# necessary libraries
library(plyr)
## Warning: package 'plyr' was built under R version 4.3.1
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.1
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(tidyr)
library(stringr)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
options(repos = c(CRAN = "https://cran.rstudio.com/"))
# install.packages("fastmap")
# install.packages("skimr")
library(skimr)
## Warning: package 'skimr' was built under R version 4.3.3
library(fastDummies)
## Warning: package 'fastDummies' was built under R version 4.3.2
## Thank you for using fastDummies!
## To acknowledge our work, please cite the package:
## Kaplan, J. & Schlegel, B. (2023). fastDummies: Fast Creation of Dummy (Binary) Columns and Rows from Categorical Variables. Version 1.7.1. URL: https://github.com/jacobkap/fastDummies, https://jacobkap.github.io/fastDummies/.
library(data.table)
## Warning: package 'data.table' was built under R version 4.3.2
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:lubridate':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
library(readxl)
## Warning: package 'readxl' was built under R version 4.3.1
library(readr)


dataset_all <- read_excel("Final Dataset.xlsx")
head(dataset_all)
## # A tibble: 6 × 49
##   Sr_No movie                year production_budget domestic_gross foreign_gross
##   <dbl> <chr>               <dbl>             <dbl>          <dbl>         <dbl>
## 1     0 Avatar               2009         425000000      760507625    2015837654
## 2     1 Pirates of the Car…  2011         410600000      241063875     804600000
## 3     2 Avengers: Age of U…  2015         330600000      459005868     944008095
## 4     3 Avengers: Infinity…  2018         300000000      678815482    1369318718
## 5     4 Justice League       2017         300000000      229024295     426920914
## 6     5 Justice League       2017         300000000      229024295     426920914
## # ℹ 43 more variables: worldwide_gross <dbl>, month <dbl>, profit <dbl>,
## #   profit_margin <chr>, roi <dbl>, pct_foreign <dbl>, match_key <chr>,
## #   popularity <dbl>, release_date <chr>, original_language <chr>,
## #   vote_average <dbl>, vote_count <dbl>, genre_list <chr>, genres <chr>,
## #   Action <dbl>, Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## #   Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, History <dbl>,
## #   Horror <dbl>, Music <dbl>, Mystery <dbl>, Romance <dbl>, …

Remove Column First

# Explicitly use dplyr to remove the column
dataset_all <- dplyr::select(dataset_all, -Sr_No)

We are going to do for year 2010 to 2018

# remove all other years 

# Filter the data to include only rows where year is between 2010 and 2018
data <- subset(dataset_all, year >= 2010 & year <= 2018)

# Display the filtered data
head(data)
## # A tibble: 6 × 48
##   movie      year production_budget domestic_gross foreign_gross worldwide_gross
##   <chr>     <dbl>             <dbl>          <dbl>         <dbl>           <dbl>
## 1 Pirates …  2011         410600000      241063875     804600000      1045663875
## 2 Avengers…  2015         330600000      459005868     944008095      1403013963
## 3 Avengers…  2018         300000000      678815482    1369318718      2048134200
## 4 Justice …  2017         300000000      229024295     426920914       655945209
## 5 Justice …  2017         300000000      229024295     426920914       655945209
## 6 Spectre    2015         300000000      200074175     679546748       879620923
## # ℹ 42 more variables: month <dbl>, profit <dbl>, profit_margin <chr>,
## #   roi <dbl>, pct_foreign <dbl>, match_key <chr>, popularity <dbl>,
## #   release_date <chr>, original_language <chr>, vote_average <dbl>,
## #   vote_count <dbl>, genre_list <chr>, genres <chr>, Action <dbl>,
## #   Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## #   Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, History <dbl>,
## #   Horror <dbl>, Music <dbl>, Mystery <dbl>, Romance <dbl>, …

CPI

# CPI data from 2010 to 2018, with 2018 as the base year
cpi_data <- data.frame(
  year = 2010:2018,
  cpi = c(218.1, 224.9, 229.6, 233, 236.7, 237, 240, 245.1, 251.1)
)

# Convert the 'year' column in 'data' to integer to match 'cpi_data'
data <- data %>%
  mutate(year = as.integer(year))

# Now perform the join
data <- data %>%
  left_join(cpi_data, by = "year")

# View the result
print(data)
## # A tibble: 1,702 × 49
##    movie     year production_budget domestic_gross foreign_gross worldwide_gross
##    <chr>    <int>             <dbl>          <dbl>         <dbl>           <dbl>
##  1 Pirates…  2011         410600000      241063875     804600000      1045663875
##  2 Avenger…  2015         330600000      459005868     944008095      1403013963
##  3 Avenger…  2018         300000000      678815482    1369318718      2048134200
##  4 Justice…  2017         300000000      229024295     426920914       655945209
##  5 Justice…  2017         300000000      229024295     426920914       655945209
##  6 Spectre   2015         300000000      200074175     679546748       879620923
##  7 Spectre   2015         300000000      200074175     679546748       879620923
##  8 The Dar…  2012         275000000      448139099     636300000      1084439099
##  9 Solo: A…  2018         275000000      213767512     179383835       393151347
## 10 The Lon…  2013         275000000       89302115     170700000       260002115
## # ℹ 1,692 more rows
## # ℹ 43 more variables: month <dbl>, profit <dbl>, profit_margin <chr>,
## #   roi <dbl>, pct_foreign <dbl>, match_key <chr>, popularity <dbl>,
## #   release_date <chr>, original_language <chr>, vote_average <dbl>,
## #   vote_count <dbl>, genre_list <chr>, genres <chr>, Action <dbl>,
## #   Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## #   Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, …
# Set base CPI (2018 CPI value)
base_cpi <- cpi_data$cpi[cpi_data$year == 2018]

# Adjust for inflation
data <- data %>%
  mutate(
    production_budget_adj = production_budget * (base_cpi / cpi),
    domestic_gross_adj = domestic_gross * (base_cpi / cpi),
    foreign_gross_adj = foreign_gross * (base_cpi / cpi),
    worldwide_gross_adj = worldwide_gross * (base_cpi / cpi),
    profit_adj = worldwide_gross_adj - production_budget_adj,
    roi_adj = (profit_adj / production_budget_adj) * 100
  )

head(data)
## # A tibble: 6 × 55
##   movie      year production_budget domestic_gross foreign_gross worldwide_gross
##   <chr>     <int>             <dbl>          <dbl>         <dbl>           <dbl>
## 1 Pirates …  2011         410600000      241063875     804600000      1045663875
## 2 Avengers…  2015         330600000      459005868     944008095      1403013963
## 3 Avengers…  2018         300000000      678815482    1369318718      2048134200
## 4 Justice …  2017         300000000      229024295     426920914       655945209
## 5 Justice …  2017         300000000      229024295     426920914       655945209
## 6 Spectre    2015         300000000      200074175     679546748       879620923
## # ℹ 49 more variables: month <dbl>, profit <dbl>, profit_margin <chr>,
## #   roi <dbl>, pct_foreign <dbl>, match_key <chr>, popularity <dbl>,
## #   release_date <chr>, original_language <chr>, vote_average <dbl>,
## #   vote_count <dbl>, genre_list <chr>, genres <chr>, Action <dbl>,
## #   Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## #   Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, History <dbl>,
## #   Horror <dbl>, Music <dbl>, Mystery <dbl>, Romance <dbl>, …

Inflation adjustments have been applied to financial data (e.g., budgets, gross revenues) using the CPI with 2018 as the base year. This ensures monetary comparisons are meaningful over time.

There are few dublicate observation.

# Identify and display the duplicate rows based on 'match_key'
duplicate_data <- data[duplicated(data$match_key) | duplicated(data$match_key, fromLast = TRUE), ]

# Display the duplicate rows
print(duplicate_data)
## # A tibble: 305 × 55
##    movie     year production_budget domestic_gross foreign_gross worldwide_gross
##    <chr>    <int>             <dbl>          <dbl>         <dbl>           <dbl>
##  1 Justice…  2017         300000000      229024295     426920914       655945209
##  2 Justice…  2017         300000000      229024295     426920914       655945209
##  3 Spectre   2015         300000000      200074175     679546748       879620923
##  4 Spectre   2015         300000000      200074175     679546748       879620923
##  5 Robin H…  2010         210000000      105487148     216971858       322459006
##  6 Robin H…  2010         210000000      105487148     216971858       322459006
##  7 Robin H…  2010         210000000      105487148     216971858       322459006
##  8 Robin H…  2010         210000000      105487148     216971858       322459006
##  9 Rogue O…  2016         200000000      532177324     516925532      1049102856
## 10 Rogue O…  2016         200000000      532177324     516925532      1049102856
## # ℹ 295 more rows
## # ℹ 49 more variables: month <dbl>, profit <dbl>, profit_margin <chr>,
## #   roi <dbl>, pct_foreign <dbl>, match_key <chr>, popularity <dbl>,
## #   release_date <chr>, original_language <chr>, vote_average <dbl>,
## #   vote_count <dbl>, genre_list <chr>, genres <chr>, Action <dbl>,
## #   Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## #   Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, …
# Remove the duplicate rows, keeping only the first occurrence of each 'match_key'
data <- data[!duplicated(data$match_key), ]

# Display the first few rows of the cleaned data
head(data)
## # A tibble: 6 × 55
##   movie      year production_budget domestic_gross foreign_gross worldwide_gross
##   <chr>     <int>             <dbl>          <dbl>         <dbl>           <dbl>
## 1 Pirates …  2011         410600000      241063875     804600000      1045663875
## 2 Avengers…  2015         330600000      459005868     944008095      1403013963
## 3 Avengers…  2018         300000000      678815482    1369318718      2048134200
## 4 Justice …  2017         300000000      229024295     426920914       655945209
## 5 Spectre    2015         300000000      200074175     679546748       879620923
## 6 The Dark…  2012         275000000      448139099     636300000      1084439099
## # ℹ 49 more variables: month <dbl>, profit <dbl>, profit_margin <chr>,
## #   roi <dbl>, pct_foreign <dbl>, match_key <chr>, popularity <dbl>,
## #   release_date <chr>, original_language <chr>, vote_average <dbl>,
## #   vote_count <dbl>, genre_list <chr>, genres <chr>, Action <dbl>,
## #   Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## #   Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, History <dbl>,
## #   Horror <dbl>, Music <dbl>, Mystery <dbl>, Romance <dbl>, …

We are removing the observations that have 0 in ‘domestic_gross’ and ‘foreign_gross’

# Identify and display the rows where both 'domestic_gross' and 'foreign_gross' are 0
zero_gross_data <- subset(data, domestic_gross == 0 & foreign_gross == 0)

# Display the observations with both 'domestic_gross' and 'foreign_gross' as 0
print(zero_gross_data)
## # A tibble: 97 × 55
##    movie     year production_budget domestic_gross foreign_gross worldwide_gross
##    <chr>    <int>             <dbl>          <dbl>         <dbl>           <dbl>
##  1 Bright    2017          90000000              0             0               0
##  2 The Rid…  2015          60000000              0             0               0
##  3 The Fac…  2015          26000000              0             0               0
##  4 Dwegons…  2014          20000000              0             0               0
##  5 Fight V…  2016          20000000              0             0               0
##  6 Bird Box  2018          19800000              0             0               0
##  7 The Pri…  2014          18000000              0             0               0
##  8 Forsaken  2016          18000000              0             0               0
##  9 Drive H…  2014          12000000              0             0               0
## 10 Dancin'…  2015          12000000              0             0               0
## # ℹ 87 more rows
## # ℹ 49 more variables: month <dbl>, profit <dbl>, profit_margin <chr>,
## #   roi <dbl>, pct_foreign <dbl>, match_key <chr>, popularity <dbl>,
## #   release_date <chr>, original_language <chr>, vote_average <dbl>,
## #   vote_count <dbl>, genre_list <chr>, genres <chr>, Action <dbl>,
## #   Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## #   Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, …
# Remove those observations from the dataset
data <- subset(data, !(domestic_gross == 0 & foreign_gross == 0))

# Display the first few rows of the cleaned data
head(data)
## # A tibble: 6 × 55
##   movie      year production_budget domestic_gross foreign_gross worldwide_gross
##   <chr>     <int>             <dbl>          <dbl>         <dbl>           <dbl>
## 1 Pirates …  2011         410600000      241063875     804600000      1045663875
## 2 Avengers…  2015         330600000      459005868     944008095      1403013963
## 3 Avengers…  2018         300000000      678815482    1369318718      2048134200
## 4 Justice …  2017         300000000      229024295     426920914       655945209
## 5 Spectre    2015         300000000      200074175     679546748       879620923
## 6 The Dark…  2012         275000000      448139099     636300000      1084439099
## # ℹ 49 more variables: month <dbl>, profit <dbl>, profit_margin <chr>,
## #   roi <dbl>, pct_foreign <dbl>, match_key <chr>, popularity <dbl>,
## #   release_date <chr>, original_language <chr>, vote_average <dbl>,
## #   vote_count <dbl>, genre_list <chr>, genres <chr>, Action <dbl>,
## #   Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## #   Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, History <dbl>,
## #   Horror <dbl>, Music <dbl>, Mystery <dbl>, Romance <dbl>, …

Removing observation which has ZERO in ‘domestic_gross’

# Identify and display the rows where 'domestic_gross' are 0
zero_dgross <- subset(data, domestic_gross == 0)

# Display the observations with both 'domestic_gross' and 'foreign_gross' as 0
print(zero_dgross)
## # A tibble: 40 × 55
##    movie     year production_budget domestic_gross foreign_gross worldwide_gross
##    <chr>    <int>             <dbl>          <dbl>         <dbl>           <dbl>
##  1 Air Str…  2018          65000000              0        516279          516279
##  2 The Lov…  2015          35000000              0         53899           53899
##  3 Konfere…  2010          30000000              0      53048539        53048539
##  4 Acciden…  2015          26000000              0        135436          135436
##  5 Ironclad  2011          25000000              0       5297411         5297411
##  6 Zambezia  2012          20000000              0      34454336        34454336
##  7 Survivor  2015          20000000              0       1703281         1703281
##  8 The Fro…  2013          19200000              0       5617460         5617460
##  9 I Am Wr…  2016          18000000              0        309608          309608
## 10 Wolves    2014          18000000              0         94953           94953
## # ℹ 30 more rows
## # ℹ 49 more variables: month <dbl>, profit <dbl>, profit_margin <chr>,
## #   roi <dbl>, pct_foreign <dbl>, match_key <chr>, popularity <dbl>,
## #   release_date <chr>, original_language <chr>, vote_average <dbl>,
## #   vote_count <dbl>, genre_list <chr>, genres <chr>, Action <dbl>,
## #   Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## #   Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, …
# Remove those observations from the dataset
data <- subset(data, !(domestic_gross == 0))

# Display the first few rows of the cleaned data
head(data)
## # A tibble: 6 × 55
##   movie      year production_budget domestic_gross foreign_gross worldwide_gross
##   <chr>     <int>             <dbl>          <dbl>         <dbl>           <dbl>
## 1 Pirates …  2011         410600000      241063875     804600000      1045663875
## 2 Avengers…  2015         330600000      459005868     944008095      1403013963
## 3 Avengers…  2018         300000000      678815482    1369318718      2048134200
## 4 Justice …  2017         300000000      229024295     426920914       655945209
## 5 Spectre    2015         300000000      200074175     679546748       879620923
## 6 The Dark…  2012         275000000      448139099     636300000      1084439099
## # ℹ 49 more variables: month <dbl>, profit <dbl>, profit_margin <chr>,
## #   roi <dbl>, pct_foreign <dbl>, match_key <chr>, popularity <dbl>,
## #   release_date <chr>, original_language <chr>, vote_average <dbl>,
## #   vote_count <dbl>, genre_list <chr>, genres <chr>, Action <dbl>,
## #   Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## #   Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, History <dbl>,
## #   Horror <dbl>, Music <dbl>, Mystery <dbl>, Romance <dbl>, …

Removing observation which has ZERO in ‘foreign_gross’

# Identify and display the rows where 'foreign_gross' are 0
zero_fgross <- subset(data, foreign_gross == 0)

# Display the observations with both 'foreign_gross' 
print(zero_dgross)
## # A tibble: 40 × 55
##    movie     year production_budget domestic_gross foreign_gross worldwide_gross
##    <chr>    <int>             <dbl>          <dbl>         <dbl>           <dbl>
##  1 Air Str…  2018          65000000              0        516279          516279
##  2 The Lov…  2015          35000000              0         53899           53899
##  3 Konfere…  2010          30000000              0      53048539        53048539
##  4 Acciden…  2015          26000000              0        135436          135436
##  5 Ironclad  2011          25000000              0       5297411         5297411
##  6 Zambezia  2012          20000000              0      34454336        34454336
##  7 Survivor  2015          20000000              0       1703281         1703281
##  8 The Fro…  2013          19200000              0       5617460         5617460
##  9 I Am Wr…  2016          18000000              0        309608          309608
## 10 Wolves    2014          18000000              0         94953           94953
## # ℹ 30 more rows
## # ℹ 49 more variables: month <dbl>, profit <dbl>, profit_margin <chr>,
## #   roi <dbl>, pct_foreign <dbl>, match_key <chr>, popularity <dbl>,
## #   release_date <chr>, original_language <chr>, vote_average <dbl>,
## #   vote_count <dbl>, genre_list <chr>, genres <chr>, Action <dbl>,
## #   Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## #   Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, …
# Remove those observations from the dataset
data <- subset(data, !(foreign_gross == 0))

# Display the first few rows of the cleaned data
head(data)
## # A tibble: 6 × 55
##   movie      year production_budget domestic_gross foreign_gross worldwide_gross
##   <chr>     <int>             <dbl>          <dbl>         <dbl>           <dbl>
## 1 Pirates …  2011         410600000      241063875     804600000      1045663875
## 2 Avengers…  2015         330600000      459005868     944008095      1403013963
## 3 Avengers…  2018         300000000      678815482    1369318718      2048134200
## 4 Justice …  2017         300000000      229024295     426920914       655945209
## 5 Spectre    2015         300000000      200074175     679546748       879620923
## 6 The Dark…  2012         275000000      448139099     636300000      1084439099
## # ℹ 49 more variables: month <dbl>, profit <dbl>, profit_margin <chr>,
## #   roi <dbl>, pct_foreign <dbl>, match_key <chr>, popularity <dbl>,
## #   release_date <chr>, original_language <chr>, vote_average <dbl>,
## #   vote_count <dbl>, genre_list <chr>, genres <chr>, Action <dbl>,
## #   Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## #   Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, History <dbl>,
## #   Horror <dbl>, Music <dbl>, Mystery <dbl>, Romance <dbl>, …

Removing the none observation from the dataset

# Remove rows where genres is "none"
data <- data[data$genres != "none", ]

head(data)
## # A tibble: 6 × 55
##   movie      year production_budget domestic_gross foreign_gross worldwide_gross
##   <chr>     <int>             <dbl>          <dbl>         <dbl>           <dbl>
## 1 Pirates …  2011         410600000      241063875     804600000      1045663875
## 2 Avengers…  2015         330600000      459005868     944008095      1403013963
## 3 Avengers…  2018         300000000      678815482    1369318718      2048134200
## 4 Justice …  2017         300000000      229024295     426920914       655945209
## 5 Spectre    2015         300000000      200074175     679546748       879620923
## 6 The Dark…  2012         275000000      448139099     636300000      1084439099
## # ℹ 49 more variables: month <dbl>, profit <dbl>, profit_margin <chr>,
## #   roi <dbl>, pct_foreign <dbl>, match_key <chr>, popularity <dbl>,
## #   release_date <chr>, original_language <chr>, vote_average <dbl>,
## #   vote_count <dbl>, genre_list <chr>, genres <chr>, Action <dbl>,
## #   Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## #   Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, History <dbl>,
## #   Horror <dbl>, Music <dbl>, Mystery <dbl>, Romance <dbl>, …

Removing the Not Rated observation from the MPAA Variable

# Remove rows where genres is "none"
data <- data[data$MPAA_Rating != "Not Rated", ]

head(data)
## # A tibble: 6 × 55
##   movie      year production_budget domestic_gross foreign_gross worldwide_gross
##   <chr>     <int>             <dbl>          <dbl>         <dbl>           <dbl>
## 1 Pirates …  2011         410600000      241063875     804600000      1045663875
## 2 Avengers…  2015         330600000      459005868     944008095      1403013963
## 3 Avengers…  2018         300000000      678815482    1369318718      2048134200
## 4 Justice …  2017         300000000      229024295     426920914       655945209
## 5 Spectre    2015         300000000      200074175     679546748       879620923
## 6 The Dark…  2012         275000000      448139099     636300000      1084439099
## # ℹ 49 more variables: month <dbl>, profit <dbl>, profit_margin <chr>,
## #   roi <dbl>, pct_foreign <dbl>, match_key <chr>, popularity <dbl>,
## #   release_date <chr>, original_language <chr>, vote_average <dbl>,
## #   vote_count <dbl>, genre_list <chr>, genres <chr>, Action <dbl>,
## #   Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## #   Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, History <dbl>,
## #   Horror <dbl>, Music <dbl>, Mystery <dbl>, Romance <dbl>, …

After cleaning the data we see that there is no movies in the variable named TV Movie.

# Remove the 'TV Movie' variable from the dataset
data <- data[ , !(names(data) %in% c("TV Movie"))]

# Display the first few rows of the cleaned data
head(data)
## # A tibble: 6 × 54
##   movie      year production_budget domestic_gross foreign_gross worldwide_gross
##   <chr>     <int>             <dbl>          <dbl>         <dbl>           <dbl>
## 1 Pirates …  2011         410600000      241063875     804600000      1045663875
## 2 Avengers…  2015         330600000      459005868     944008095      1403013963
## 3 Avengers…  2018         300000000      678815482    1369318718      2048134200
## 4 Justice …  2017         300000000      229024295     426920914       655945209
## 5 Spectre    2015         300000000      200074175     679546748       879620923
## 6 The Dark…  2012         275000000      448139099     636300000      1084439099
## # ℹ 48 more variables: month <dbl>, profit <dbl>, profit_margin <chr>,
## #   roi <dbl>, pct_foreign <dbl>, match_key <chr>, popularity <dbl>,
## #   release_date <chr>, original_language <chr>, vote_average <dbl>,
## #   vote_count <dbl>, genre_list <chr>, genres <chr>, Action <dbl>,
## #   Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## #   Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, History <dbl>,
## #   Horror <dbl>, Music <dbl>, Mystery <dbl>, Romance <dbl>, …

New variables

Seasons

# Define a function to categorize months into seasons
get_season <- function(month) {
  if (month %in% c(12, 1, 2)) {
    return("Winter")
  } else if (month %in% c(3, 4, 5)) {
    return("Spring")
  } else if (month %in% c(6, 7, 8)) {
    return("Summer")
  } else if (month %in% c(9, 10, 11)) {
    return("Fall")
  } 
}

# Apply the function to the 'month' column to create the 'season' column
data$Seasons <- sapply(data$month, get_season)

# Convert 'season' to a factor for analysis
data$Seasons <- as.factor(data$Seasons)

# Reorder columns: Insert 'season' right after 'month'
month_index <- which(colnames(data) == "month")  # Find the index of the 'month' column
data <- data[, c(1:month_index, ncol(data), (month_index+1):(ncol(data)-1))]


# Check the first few rows to see the new 'season' column
head(data)
## # A tibble: 6 × 55
##   movie      year production_budget domestic_gross foreign_gross worldwide_gross
##   <chr>     <int>             <dbl>          <dbl>         <dbl>           <dbl>
## 1 Pirates …  2011         410600000      241063875     804600000      1045663875
## 2 Avengers…  2015         330600000      459005868     944008095      1403013963
## 3 Avengers…  2018         300000000      678815482    1369318718      2048134200
## 4 Justice …  2017         300000000      229024295     426920914       655945209
## 5 Spectre    2015         300000000      200074175     679546748       879620923
## 6 The Dark…  2012         275000000      448139099     636300000      1084439099
## # ℹ 49 more variables: month <dbl>, Seasons <fct>, profit <dbl>,
## #   profit_margin <chr>, roi <dbl>, pct_foreign <dbl>, match_key <chr>,
## #   popularity <dbl>, release_date <chr>, original_language <chr>,
## #   vote_average <dbl>, vote_count <dbl>, genre_list <chr>, genres <chr>,
## #   Action <dbl>, Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## #   Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, History <dbl>,
## #   Horror <dbl>, Music <dbl>, Mystery <dbl>, Romance <dbl>, …

Profit Adj Margin

# Calculate profit_adj_margin based on the adjusted profit and worldwide gross
data <- data %>%
  mutate(
    profit_adj_margin = profit_adj / worldwide_gross_adj)

Genre Count

# List of genre columns in the dataset
genre_columns <- c('Action', 'Adventure', 'Animation', 'Comedy', 'Crime', 
                   'Documentary', 'Drama', 'Family', 'Fantasy', 'History', 
                   'Horror', 'Music', 'Mystery', 'Romance', 'Science Fiction', 
                   'Thriller', 'War', 'Western')

# Create a new column 'genre_count' which is the sum of the genre columns for each movie
data$genre_count <- rowSums(data[, genre_columns])

# Check the first few rows to see the new 'genre_count' column
head(data)
## # A tibble: 6 × 57
##   movie      year production_budget domestic_gross foreign_gross worldwide_gross
##   <chr>     <int>             <dbl>          <dbl>         <dbl>           <dbl>
## 1 Pirates …  2011         410600000      241063875     804600000      1045663875
## 2 Avengers…  2015         330600000      459005868     944008095      1403013963
## 3 Avengers…  2018         300000000      678815482    1369318718      2048134200
## 4 Justice …  2017         300000000      229024295     426920914       655945209
## 5 Spectre    2015         300000000      200074175     679546748       879620923
## 6 The Dark…  2012         275000000      448139099     636300000      1084439099
## # ℹ 51 more variables: month <dbl>, Seasons <fct>, profit <dbl>,
## #   profit_margin <chr>, roi <dbl>, pct_foreign <dbl>, match_key <chr>,
## #   popularity <dbl>, release_date <chr>, original_language <chr>,
## #   vote_average <dbl>, vote_count <dbl>, genre_list <chr>, genres <chr>,
## #   Action <dbl>, Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## #   Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, History <dbl>,
## #   Horror <dbl>, Music <dbl>, Mystery <dbl>, Romance <dbl>, …

Runtime Categories

# Categorize runtime into buckets
data <- data %>%
  mutate(Runtime_category = case_when(
    Runtime <= 90 ~ "Less than 90",
    Runtime > 90 & Runtime <= 135 ~ "90 to 135",
    Runtime > 135 ~ "Greater than 135"
  ))

# Convert to factor
data$Runtime_category <- as.factor(data$Runtime_category)

# Check if the transformation is correct
table(data$Runtime_category)
## 
##        90 to 135 Greater than 135     Less than 90 
##             1076               99              137

Vote Ratio

# Calculate the vote ratio and create a new column
data$vote_ratio <- data$vote_average / data$vote_count

# Find the index of the 'vote_count' column
vote_count_index <- which(colnames(data) == "vote_count")

# Reorder columns to place 'vote_ratio' right after 'vote_count'
data <- data[, c(1:vote_count_index, ncol(data), (vote_count_index+1):(ncol(data)-1))]

# Check the first few rows to see the new 'vote_ratio' column
head(data)
## # A tibble: 6 × 59
##   movie      year production_budget domestic_gross foreign_gross worldwide_gross
##   <chr>     <int>             <dbl>          <dbl>         <dbl>           <dbl>
## 1 Pirates …  2011         410600000      241063875     804600000      1045663875
## 2 Avengers…  2015         330600000      459005868     944008095      1403013963
## 3 Avengers…  2018         300000000      678815482    1369318718      2048134200
## 4 Justice …  2017         300000000      229024295     426920914       655945209
## 5 Spectre    2015         300000000      200074175     679546748       879620923
## 6 The Dark…  2012         275000000      448139099     636300000      1084439099
## # ℹ 53 more variables: month <dbl>, Seasons <fct>, profit <dbl>,
## #   profit_margin <chr>, roi <dbl>, pct_foreign <dbl>, match_key <chr>,
## #   popularity <dbl>, release_date <chr>, original_language <chr>,
## #   vote_average <dbl>, vote_count <dbl>, vote_ratio <dbl>, genre_list <chr>,
## #   genres <chr>, Action <dbl>, Adventure <dbl>, Animation <dbl>, Comedy <dbl>,
## #   Crime <dbl>, Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>,
## #   History <dbl>, Horror <dbl>, Music <dbl>, Mystery <dbl>, Romance <dbl>, …

Profit Categories

# Define profit categories based on profit relative to production_budget_adj
data$profit_category <- with(data, ifelse(profit_adj <= 0, "Loss",
                                  ifelse(profit_adj <= production_budget_adj, "Break-even",
                                  ifelse(profit_adj <= production_budget_adj * 2, "Profitable",
                                  "Successfull"))))

# Find the index of the 'profit' column
profit_index <- which(colnames(data) == "profit")

# Reorder columns to place 'profit_category' right after 'profit'
data <- data[, c(1:profit_index, ncol(data), (profit_index+1):(ncol(data)-1))]

# View the distribution of movies in each category
table(data$profit_category)
## 
##  Break-even        Loss  Profitable Successfull 
##         273         261         248         530
# Optional: Calculate the percentage of movies in each category
prop.table(table(data$profit_category)) * 100
## 
##  Break-even        Loss  Profitable Successfull 
##    20.80793    19.89329    18.90244    40.39634
# Check the first few rows to verify the profit categories
head(data)
## # A tibble: 6 × 60
##   movie      year production_budget domestic_gross foreign_gross worldwide_gross
##   <chr>     <int>             <dbl>          <dbl>         <dbl>           <dbl>
## 1 Pirates …  2011         410600000      241063875     804600000      1045663875
## 2 Avengers…  2015         330600000      459005868     944008095      1403013963
## 3 Avengers…  2018         300000000      678815482    1369318718      2048134200
## 4 Justice …  2017         300000000      229024295     426920914       655945209
## 5 Spectre    2015         300000000      200074175     679546748       879620923
## 6 The Dark…  2012         275000000      448139099     636300000      1084439099
## # ℹ 54 more variables: month <dbl>, Seasons <fct>, profit <dbl>,
## #   profit_category <chr>, profit_margin <chr>, roi <dbl>, pct_foreign <dbl>,
## #   match_key <chr>, popularity <dbl>, release_date <chr>,
## #   original_language <chr>, vote_average <dbl>, vote_count <dbl>,
## #   vote_ratio <dbl>, genre_list <chr>, genres <chr>, Action <dbl>,
## #   Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## #   Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, …

Creating the main Genre variable

# Extract the first genre from the 'genres_list' column and create a new column 'main_genres'
data$main_genres <- sapply(strsplit(data$genre_list, ", "), `[`, 1)

# Clean up the extra characters [' and ' from the 'main_genres'
data$main_genres <- gsub("\\[|\\]|'", "", data$main_genres)

# Find the index of the 'genres' column
genres_index <- which(colnames(data) == "genres")

# Reorder columns to place 'main_genres' right after 'genres'
data <- data[, c(1:genres_index, ncol(data), (genres_index+1):(ncol(data)-1))]

# Add "Main_" prefix to each category in 'main_genres'
data$main_genres <- paste("Main_", data$main_genres, sep = "")

# Display the first few rows to check the result
head(data)
## # A tibble: 6 × 61
##   movie      year production_budget domestic_gross foreign_gross worldwide_gross
##   <chr>     <int>             <dbl>          <dbl>         <dbl>           <dbl>
## 1 Pirates …  2011         410600000      241063875     804600000      1045663875
## 2 Avengers…  2015         330600000      459005868     944008095      1403013963
## 3 Avengers…  2018         300000000      678815482    1369318718      2048134200
## 4 Justice …  2017         300000000      229024295     426920914       655945209
## 5 Spectre    2015         300000000      200074175     679546748       879620923
## 6 The Dark…  2012         275000000      448139099     636300000      1084439099
## # ℹ 55 more variables: month <dbl>, Seasons <fct>, profit <dbl>,
## #   profit_category <chr>, profit_margin <chr>, roi <dbl>, pct_foreign <dbl>,
## #   match_key <chr>, popularity <dbl>, release_date <chr>,
## #   original_language <chr>, vote_average <dbl>, vote_count <dbl>,
## #   vote_ratio <dbl>, genre_list <chr>, genres <chr>, main_genres <chr>,
## #   Action <dbl>, Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## #   Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, …

Giving them a binary variable

# List of genres to create binary variables for
genres <- c("Main_Action", "Main_Adventure", "Main_Animation", "Main_Comedy", "Main_Crime", 
            "Main_Documentary", "Main_Drama", "Main_Family", "Main_Fantasy", "Main_History", 
            "Main_Horror", "Main_Music", "Main_Mystery", "Main_Romance", "Main_Science Fiction", 
            "Main_Thriller", "Main_War", "Main_Western")

# Create binary variables for each genre
for (genre in genres) {
  data[[genre]] <- ifelse(data$main_genres == genre, 1, 0)
}

data$main_genres <- as.factor(as.character(data$main_genres))

# View the dataframe
summary(data$main_genres)
##          Main_Action       Main_Adventure       Main_Animation 
##                  247                   86                   46 
##          Main_Comedy           Main_Crime     Main_Documentary 
##                  239                   63                   10 
##           Main_Drama          Main_Family         Main_Fantasy 
##                  321                   17                   30 
##         Main_History          Main_Horror           Main_Music 
##                    9                   76                    5 
##         Main_Mystery         Main_Romance Main_Science Fiction 
##                   14                   32                   30 
##        Main_Thriller             Main_War         Main_Western 
##                   73                   12                    2
# Define the genres to include in "Other_Genres"
other_genres <- c("Main_Music", "Main_Western")

# Create the "Other_Genres" variable
data$Other_Genres <- ifelse(data$main_genres %in% other_genres, 1, 0)

# Ensure the main_genres column is correctly formatted
head(data$main_genres)
## [1] Main_Adventure Main_Action    Main_Adventure Main_Action    Main_Action   
## [6] Main_Action   
## 18 Levels: Main_Action Main_Adventure Main_Animation Main_Comedy ... Main_Western
head(data$Other_Genres)
## [1] 0 0 0 0 0 0
table(data$Other_Genres)
## 
##    0    1 
## 1305    7

Groups less frequent genres (e.g., Music, Western) into a separate binary variable. Reduces sparsity in genre categories for cleaner analysis and ensures rare genres are still captured.

Data Vis

Scatter Plot: Production Budget vs. Worldwide Gross

# Scatter plot
ggplot(data, aes(x = production_budget_adj, y = worldwide_gross_adj)) +
  geom_point(color = "blue", alpha = 0.6) +
  labs(title = "Production Budget vs Worldwide Gross", 
       x = "Production Budget (USD)", 
       y = "Worldwide Gross (USD)") +
  theme_minimal()

The scatter plot demonstrates a clear positive correlation between production budgets and worldwide gross, indicating that higher budgets generally lead to higher revenues. However, diminishing returns are observed for extremely high budgets (beyond $200 million), where revenue growth is less proportional. Notable outliers include some movies with exceptionally high profits relative to their budget, likely representing blockbusters, while others with high budgets but lower gross highlight potential losses or inefficiencies. This underscores the importance of strategic budget allocation, particularly in the mid-range budget segment, where outcomes are more variable and require further analysis by factors like genre or timing to optimize investments.

Bar Chart: Movie Profits by Year

# Bar chart of total profits per year
ggplot(data, aes(x = factor(year), y = profit_adj)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  labs(title = "Total Movie Profits by Year", 
       x = "Year", 
       y = "Total Profit (USD)") +
  theme_minimal()

The bar chart illustrates yearly total adjusted profits from 2010 to 2018, showing a consistent trend with slight fluctuations. The overall profits remain relatively stable, with notable peaks in 2014 and 2016, indicating these years were particularly profitable for the film industry. Conversely, 2011 appears to have experienced slightly lower profits compared to other years. This stability suggests a resilient industry, but the peaks highlight years that likely benefited from a higher volume of successful movies or blockbuster releases. Further analysis of genres or seasons during peak years could provide insights into what drove these trends.

Histogram: Distribution of Production Budget

# Histogram for distribution of production budgets
ggplot(data, aes(x = production_budget_adj)) +
  geom_histogram(binwidth = 50000000, fill = "purple", color = "black") +
  labs(title = "Distribution of Production Budgets", 
       x = "Production Budget (USD)", 
       y = "Number of Movies") +
  theme_minimal()

The histogram reveals that most movies have relatively low production budgets, with the majority clustering below $50 million. A steep drop-off occurs as budgets increase, indicating fewer high-budget films. This suggests that the film industry predominantly operates in the low-to-mid budget range, while only a small fraction of movies receive substantial funding exceeding $200 million. These high-budget films are likely blockbusters or major studio productions, representing significant but rarer investments.

Density Plot: Domestic vs Foreign Gross

# Density plot for domestic and foreign gross
ggplot(data) +
  geom_density(aes(x = domestic_gross_adj, fill = "Domestic"), alpha = 0.5, color = "darkblue") +
  geom_density(aes(x = foreign_gross_adj, fill = "Foreign"), alpha = 0.5, color = "darkred") +
  labs(title = "Density Plot of Domestic vs Foreign Gross", 
       x = "Gross Earnings (USD)", 
       y = "Density") +
  theme_minimal()

The density plot compares the distribution of domestic and foreign gross earnings. Both distributions are heavily right-skewed, with the majority of movies earning less than $200 million in both markets. However, foreign gross shows a slightly wider spread, indicating that some movies perform exceptionally well internationally compared to domestic markets. This suggests the importance of targeting global audiences for maximizing revenue, especially for films with cross-cultural or international appeal.

Facet Grid: Profitability Across Seasons

library(ggplot2)

ggplot(data, aes(x = year, y = profit_adj, group = 1, color = Seasons)) +
  geom_line(size = 1) +
  facet_grid(. ~ Seasons) +
  labs(
    title = "Profitability Trends Across Seasons", 
    x = "Year", 
    y = "Profit (USD)"
  ) +
  theme_minimal() +
  theme(
    legend.position = "none",
    strip.text = element_text(face = "bold", size = 12)  
  ) +
  scale_color_manual(
    values = c("Spring" = "darkgreen", "Summer" = "blue", "Fall" = "orange", "Winter" = "purple")
  )
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Spring shows higher peaks in profitability compared to Summer in several years, suggesting that movies released in Spring are performing exceptionally well. Summer still shows consistent profitability across years but often trails Spring in terms of the highest profit spikes. Fall and Winter remain the least profitable seasons, with lower peaks and reduced variability. This suggests Spring might offer the most lucrative release opportunities, challenging the conventional dominance of Summer. Strategic scheduling in Spring could capitalize on this trend, especially for high-budget films.

Interactive Plot with Plotly: Production Budget vs Worldwide Gross

library(plotly)
## Warning: package 'plotly' was built under R version 4.3.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following objects are masked from 'package:plyr':
## 
##     arrange, mutate, rename, summarise
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
# Create an interactive scatter plot
p <- ggplot(data, aes(x = production_budget_adj, y = worldwide_gross_adj, text = movie)) +
  geom_point(color = "blue", size = 3, alpha = 0.6) +
  labs(title = "Production Budget vs Worldwide Gross",
       x = "Production Budget (USD)",
       y = "Worldwide Gross (USD)")

# Make it interactive with plotly
ggplotly(p)

The interactive scatter plot showcases the relationship between production budget and worldwide gross, similar to the earlier static version. The interactive format adds value by allowing exploration of specific data points, such as identifying individual movies and outliers. The trend confirms a positive correlation, where higher budgets generally yield higher worldwide grosses, but diminishing returns become apparent at extreme budgets. The ability to hover over points for details facilitates deeper insight into standout performances, such as blockbuster hits or underperforming high-budget films, making this visualization particularly useful for identifying case studies or patterns in movie success.

Interactive Plot with Plotly: Production Budget vs Profit

# Create an interactive scatter plot
q <- ggplot(data, aes(x = production_budget_adj, y = profit_adj, text = movie)) +
  geom_point(color = "blue", size = 3, alpha = 0.6) +
  labs(title = "Production Budget vs profit",
       x = "Production Budget (USD)",
       y = "profit")

# Make it interactive with plotly
ggplotly(q)

The interactive scatter plot of production budget vs. profit highlights an interesting pattern. While higher budgets generally correlate with higher profits, there is significant variability. Some high-budget movies result in substantial profits, reinforcing their blockbuster success, while others show minimal or even negative profits, suggesting inefficient budget allocation or market underperformance. Lower-budget films often exhibit a more concentrated profit range but can still achieve impressive returns. The interactive feature allows the identification of specific movies that defy the trend, providing valuable insights into successful strategies or missteps in film investments.

Interactive Plot with Plotly: World Wide Gross Adj vs IMDB Rating

# Create an interactive scatter plot
r <- ggplot(data, aes(x = worldwide_gross_adj, y = IMDb_Rating, text = movie)) +
  geom_point(color = "blue", size = 3, alpha = 0.6) +
  labs(title = "Worldwide Gross Adj vs IMDb Rating",
       x = "Worldwide Gross Adj (USD)",
       y = "IMDb Rating")

# Make it interactive with plotly
ggplotly(r)

The interactive scatter plot of worldwide gross versus IMDb rating reveals an interesting relationship. While there is a slight positive trend indicating that higher-rated movies may achieve higher worldwide gross, the correlation is not particularly strong. Many films with average IMDb ratings (6-7) perform well in terms of gross revenue, likely due to factors like marketing, franchise power, or genre popularity. Outliers with high IMDb ratings but low gross highlight critically acclaimed but less commercially successful films, while some movies achieve high gross despite average ratings, likely due to broad audience appeal or established franchises. The interactive feature aids in pinpointing specific movies driving these trends.

Bar Plot: Average ROI by Profit Category

# Bar plot: Average ROI by Profit Category
library(ggplot2)

ggplot(data, aes(x = profit_category, y = roi_adj)) +
  stat_summary(fun = "mean", geom = "bar", fill = "skyblue") +
  labs(title = "Average ROI by Profit Category", x = "Profit Category", y = "ROI") +
  theme_minimal()

The bar plot of average ROI by profit category highlights that “Successful” movies have significantly higher average ROI compared to other categories, exceeding 600%, showcasing their exceptional financial returns. “Profitable” movies also exhibit decent ROI, while “Break-even” movies hover around zero. As expected, movies categorized as “Loss” demonstrate negative ROI. This emphasizes that targeting the “Successful” category, through careful selection of genres, budgets, and release strategies, is crucial for maximizing returns in film investments.

Box Plot: IMDb Rating by MPAA Rating

# Box plot: IMDb Rating by MPAA Rating 
library(ggplot2)


ggplot(data, aes(x = MPAA_Rating, y = IMDb_Rating, fill = MPAA_Rating)) +
  geom_boxplot() +
  labs(
    title = "IMDb Rating by MPAA Rating", 
    x = "MPAA Rating", 
    y = "IMDb Rating"
  ) +
  theme_minimal() +
  theme(
    legend.position = "none",  
    axis.text.x = element_text(face = "bold"),  
    plot.title = element_text(hjust = 0.5, face = "bold")  
  ) +
  scale_fill_manual(
    values = c("G" = "blue", "NC-17" = "red", "PG" = "green", "PG-13" = "purple", "R" = "orange")
  )

The box plot illustrates the distribution of IMDb ratings across different MPAA ratings. “G” (General Audience) movies show a slightly narrower range of ratings, with relatively high medians, indicating consistent quality. “NC-17” has very few observations, leading to a tight range and high median, but its insights may be less generalizable. “PG,” “PG-13,” and “R” ratings display broader variability in ratings, reflecting diverse audience preferences and content. Notably, the median IMDb ratings for “PG-13” and “R” films are comparable, showing these categories often target mature and broad audiences. The chart suggests that MPAA ratings influence the perceived quality but are not sole determinants of IMDb scores.

Heatmap: Genre Count vs. Runtime

# Install necessary library for heatmap
library(ggplot2)

# Prepare data for the heatmap: Runtime vs Genre Count
ggplot(data, aes(x = genre_count, y = Runtime, fill = ..count..)) +
  geom_bin2d(bins = 30) +  # Use 2D binning for better heatmap
  scale_fill_gradient(low = "lightblue", high = "darkblue") +
  labs(title = "Heatmap: Genre Count vs Runtime", 
       x = "Number of Genres", 
       y = "Runtime (min)", 
       fill = "Count") +
  theme_minimal()
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

The heatmap highlights the relationship between the number of genres a movie belongs to and its runtime. Most movies are concentrated around 1–2 genres with runtimes between 90–120 minutes, indicating that simpler, more focused genre classifications are common for typical feature-length films. As the number of genres increases, the distribution of runtimes becomes more varied, with longer runtimes often associated with multi-genre films. This pattern suggests that movies with more genres may require additional runtime to develop the complexity needed to appeal to broader audiences or to blend diverse storytelling elements effectively.

Heatmap: Genre Count vs. World Wide Gross Adj

# Prepare data for the heatmap: Genre Count vs World Wide Gross Adj
ggplot(data, aes(x = genre_count, y = worldwide_gross_adj, fill = ..count..)) +
  geom_bin2d(bins = 30) +  # Use 2D binning
  scale_fill_gradient(low = "lightgreen", high = "darkgreen") +
  labs(title = "Heatmap: Genre Count vs World Wide Gross Adj", 
       x = "Number of Genres", 
       y = "World Wide Gross Adj", 
       fill = "Count") +
  theme_minimal()

The heatmap illustrates the relationship between the number of genres a movie has and its worldwide gross (adjusted). Movies with 1–2 genres dominate the dataset, with the majority grossing less than $200 million. However, movies with 3–4 genres appear more evenly distributed across higher gross ranges, suggesting that multi-genre films may appeal to broader audiences and achieve greater financial success. Films with 5 or more genres are rare but still show significant gross potential, indicating that complexity in genre blending can succeed if executed effectively. This pattern reinforces the idea that balancing genre diversity with audience targeting is key to maximizing revenue.

Heatmap: Genre Count vs. IMDb Rating

# Prepare data for the heatmap: Genre Count vs IMDb Rating
ggplot(data, aes(x = genre_count, y = IMDb_Rating, fill = ..count..)) +
  geom_bin2d(bins = 30) +  # Use 2D binning
  scale_fill_gradient(low = "lightcoral", high = "darkred") +
  labs(title = "Heatmap: Genre Count vs IMDb Rating", 
       x = "Number of Genres", 
       y = "IMDb Rating", 
       fill = "Count") +
  theme_minimal()

The heatmap shows the relationship between the number of genres in a movie and its IMDb rating. Movies with 1–2 genres dominate the dataset and tend to have IMDb ratings clustering around 6–7, indicating that simpler genre classifications are popular and generally well-received. Movies with 3–4 genres show a broader spread in IMDb ratings, with many achieving higher ratings (above 7.5), suggesting that multi-genre films can be more critically acclaimed when executed well. However, movies with more than 4 genres are fewer in count and exhibit greater variability in ratings, reflecting either niche appeal or difficulty in maintaining consistent quality across diverse genre blends.

Lollipop Plot: IMDb Rating by Director (Top 40)

# Limit the number of directors to top 40 based on IMDb Rating
top_directors <- data %>%
  arrange(desc(IMDb_Rating)) %>%
  slice_head(n = 40)

# Lollipop plot: IMDb Rating by Director (Top 40)
ggplot(top_directors, aes(x = reorder(Director, IMDb_Rating), y = IMDb_Rating)) +
  geom_segment(aes(xend = Director, yend = 0), color = "grey") +
  geom_point(color = "steelblue", size = 4) +
  coord_flip() +
  labs(title = "Top 40 IMDb Ratings by Director", x = "Director", y = "IMDb Rating") +
  theme_minimal(base_size = 12) +  # Increase base font size for readability
  theme(axis.text.y = element_text(size = 10))  # Adjust text size for director names

The lollipop plot highlights the top 40 directors ranked by their IMDb ratings.The visualization underscores the impact of directors in shaping highly-rated movies, which aligns with their established reputations for delivering quality content. This information is crucial for stakeholders considering partnerships or investments in directors for future projects.

Stacked Bar Plot: Profit Category by Main Genre

# Stacked bar plot: Profit Category by Main Genre
ggplot(data, aes(x = main_genres, fill = profit_category)) +
  geom_bar(position = "fill") +
  labs(title = "Profit Category Distribution Across Genres", x = "Main Genre", y = "Proportion") +
  theme_minimal()

The stacked bar plot illustrates the distribution of profit categories across main genres. Certain genres, such as Animation and Adventure, have a higher proportion of “Successful” films, indicating strong profitability in these categories. Conversely, genres like Documentary and Music exhibit a greater proportion of “Loss” and “Break-even” films, suggesting higher financial risks. Action and Comedy show balanced distributions, with a mix of “Profitable” and “Successful” outcomes, reflecting their broad audience appeal. This visualization highlights which genres are safer bets for investment and which ones carry higher financial variability, providing critical insights for optimizing film investments.

Dumbbell Plot: Runtime vs IMDb Rating by MPAA Rating

# Install necessary library for dumbbell plot
library(ggalt)
## Warning: package 'ggalt' was built under R version 4.3.3
## Registered S3 methods overwritten by 'ggalt':
##   method                  from   
##   grid.draw.absoluteGrob  ggplot2
##   grobHeight.absoluteGrob ggplot2
##   grobWidth.absoluteGrob  ggplot2
##   grobX.absoluteGrob      ggplot2
##   grobY.absoluteGrob      ggplot2
library(dplyr)

# Calculate average IMDb Rating and Runtime for each MPAA Rating
mpaa_summary <- data %>%
  group_by(MPAA_Rating) %>%
  summarise(avg_runtime = mean(Runtime, na.rm = TRUE),
            avg_imdb = mean(IMDb_Rating, na.rm = TRUE))

# Dumbbell plot: Compare average Runtime and IMDb Rating by MPAA Rating
ggplot(mpaa_summary, aes(x = avg_runtime, xend = avg_imdb, y = MPAA_Rating)) +
  geom_dumbbell(color = "lightblue", size = 3) +
  labs(title = "Dumbbell Plot: Average Runtime vs IMDb Rating by MPAA Rating", 
       x = "Average Runtime (min) and IMDb Rating", y = "MPAA Rating") +
  theme_minimal(base_size = 12)
## Warning: Using the `size` aesthetic with geom_segment was deprecated in ggplot2 3.4.0.
## ℹ Please use the `linewidth` aesthetic instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

The dumbbell plot provides a clearer comparison of the average runtime (left point) and IMDb rating (right point) for each MPAA rating category. The distances between the runtime and rating points are small, suggesting that runtime has little impact on IMDb ratings within these categories. Movies rated R and PG-13 maintain higher average runtimes, while those rated G and PG have shorter runtimes, reflecting their target audiences and simpler storylines. IMDb ratings, however, remain relatively uniform across all MPAA ratings, implying audience reception is less influenced by runtime or rating classification. This plot effectively highlights these relationships in a straightforward manner.

Sankey Diagram: Flow of Movies by Main Genre to Distributor (Top 10)

# Install necessary library for Sankey diagram
library(networkD3)
## Warning: package 'networkD3' was built under R version 4.3.3
library(dplyr)

# Count combinations of main_genres and Distributor
sankey_data <- as.data.frame(table(data$main_genres, data$Distributor))

# Rename the columns for clarity
colnames(sankey_data) <- c("main_genres", "Distributor", "n")

# Filter out rows where n is zero (optional, if needed)
sankey_data <- sankey_data[sankey_data$n > 0, ]



# Filter to include only the top 10 distributors to reduce clutter
top_distributors <- sankey_data %>%
  group_by(Distributor) %>%
  summarise(total = sum(n)) %>%
  arrange(desc(total)) %>%
  slice_head(n = 10)

sankey_data_filtered <- sankey_data %>%
  filter(Distributor %in% top_distributors$Distributor)



# Create nodes and links for Sankey diagram
nodes <- unique(c(sankey_data_filtered$main_genres, sankey_data_filtered$Distributor))
sankey_links <- data.frame(source = match(sankey_data_filtered$main_genres, nodes) - 1,
                           target = match(sankey_data_filtered$Distributor, nodes) - 1,
                           value = sankey_data_filtered$n)


print(match(sankey_data_filtered$main_genres, nodes))
##   [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14  1  2  3  4  5  6  8  9 12 13 15
##  [26]  1  2  3  4  5  6  7 11 13  2  4  5  6 12 13  1  2  4  5  6  7  8  9 10 11
##  [51] 12 13  1  2  3  4  5 16  6  8  9 11 12 13 15  1  3  4  5  6  7 17  9 11  1
##  [76]  2  3  4  5  6  7  8 17  9 10 11 12 13 15  1  2  3  4  6  7  8 17  9 11 12
## [101] 13  1  2  3  4  5  6  7  8  9 18 10 11 12 13 15
# Create Sankey plot with better spacing and node sizing
sankeyNetwork(Links = sankey_links, Nodes = data.frame(name = nodes),
              Source = "source", Target = "target", Value = "value",
              NodeID = "name", units = "Movies", fontSize = 14, nodeWidth = 50,
              nodePadding = 15)  # Increased node width and padding for clarity

The Sankey diagram reveals the flow of movies from their main genres to the top 10 distributors. Key players like Warner Bros. Pictures, Universal Pictures, and Walt Disney Studios Motion Pictures dominate the distribution of diverse genres, reflecting their broad market presence. Notably, Walt Disney Studios focuses primarily on Animation and Family genres, aligning with its family-friendly brand identity. On the other hand, distributors like Focus Features and Fox Searchlight Pictures cater to niche genres, indicating a targeted market approach. This visualization highlights strategic alignments between genres and distributors, providing insights for partnerships and market positioning in film investments.

Sankey Diagram: Flow of Movies by Main Genre to Production Company (Top 10 Production Companies)

# Install necessary libraries for Sankey diagram
library(networkD3)
library(dplyr)


# Count combinations of main_genres and Distributor
sankey_data_pc <- as.data.frame(table(data$main_genres, data$Production_Company))

# Rename the columns for clarity
colnames(sankey_data_pc) <- c("main_genres", "Production_Company", "n")

# Filter out rows where n is zero (optional, if needed)
sankey_data_pc <- sankey_data_pc[sankey_data_pc$n > 0, ]



# Filter to include only the top 10 production companies to reduce clutter
top_production_companies <- sankey_data_pc %>%
  group_by(Production_Company) %>%
  summarise(total = sum(n)) %>%
  arrange(desc(total)) %>%
  slice_head(n = 10)

sankey_data_pc_filtered <- sankey_data_pc %>%
  filter(Production_Company %in% top_production_companies$Production_Company)

# Create nodes and links for Sankey diagram
nodes_pc <- unique(c(sankey_data_pc_filtered$main_genres, sankey_data_pc_filtered$Production_Company))
sankey_links_pc <- data.frame(source = match(sankey_data_pc_filtered$main_genres, nodes_pc) - 1,
                              target = match(sankey_data_pc_filtered$Production_Company, nodes_pc) - 1,
                              value = sankey_data_pc_filtered$n)

# Create Sankey plot for Main Genre to Production Company
sankeyNetwork(Links = sankey_links_pc, Nodes = data.frame(name = nodes_pc),
              Source = "source", Target = "target", Value = "value",
              NodeID = "name", units = "Movies", fontSize = 14, nodeWidth = 50,
              nodePadding = 15)  # Increased node width and padding for clarity

The Sankey diagram illustrates the connection between main genres and the top 10 production companies. Walt Disney Pictures strongly dominates Animation and Family genres, aligning with its family-oriented brand image. DreamWorks Pictures and Village Roadshow Pictures demonstrate versatility, engaging with multiple genres such as Action, Drama, and Comedy. Companies like Blumhouse Productions and Screen Gems show focused specialization in Horror and Thriller, reflecting their niche strategies. This visualization highlights the production companies’ genre preferences, offering insights into which companies to approach for collaborations based on genre alignment.

Sankey Diagram: Flow of Movies by Main Genre to Profit Category

# Install necessary libraries for Sankey diagram
library(networkD3)
library(dplyr)


# Count combinations of main_genres and Distributor
sankey_data_profit <- as.data.frame(table(data$main_genres, data$profit_category))

# Rename the columns for clarity
colnames(sankey_data_profit) <- c("main_genres", "profit_category", "n")

# Filter out rows where n is zero (optional, if needed)
sankey_data_profit <- sankey_data_profit[sankey_data_profit$n > 0, ]






# Filter to include only the top 10 profit categories to reduce clutter
top_profit_categories <- sankey_data_profit %>%
  group_by(profit_category) %>%
  summarise(total = sum(n)) %>%
  arrange(desc(total)) %>%
  slice_head(n = 10)

sankey_data_profit_filtered <- sankey_data_profit %>%
  filter(profit_category %in% top_profit_categories$profit_category)

# Create nodes and links for Sankey diagram
nodes_profit <- unique(c(sankey_data_profit_filtered$main_genres, sankey_data_profit_filtered$profit_category))
sankey_links_profit <- data.frame(source = match(sankey_data_profit_filtered$main_genres, nodes_profit) - 1,
                                  target = match(sankey_data_profit_filtered$profit_category, nodes_profit) - 1,
                                  value = sankey_data_profit_filtered$n)

# Create Sankey plot for Main Genre to Profit Category
sankeyNetwork(Links = sankey_links_profit, Nodes = data.frame(name = nodes_profit),
              Source = "source", Target = "target", Value = "value",
              NodeID = "name", units = "Movies", fontSize = 14, nodeWidth = 50,
              nodePadding = 15)  # Increased node width and padding for clarity

The Sankey diagram illustrates the flow of movies from their main genres to profit categories. Genres like Animation, Adventure, and Family show a stronger connection to the “Successful” and “Profitable” categories, reflecting their ability to generate higher returns. Conversely, genres such as Documentary, History, and Mystery are more associated with “Loss” and “Break-even,” indicating greater financial risks. This visualization highlights which genres consistently drive profitability and which ones tend to underperform, providing valuable insights for optimizing investments based on genre-specific profit trends.

Density Ridgeline Plot: IMDb Rating by Main Genre

library(ggridges)
## Warning: package 'ggridges' was built under R version 4.3.2
# Ridgeline plot: IMDb Rating by Main Genre
ggplot(data, aes(x = IMDb_Rating, y = main_genres, fill = main_genres)) +
  geom_density_ridges(scale = 3, rel_min_height = 0.01) +
  labs(title = "Density Ridgeline Plot: IMDb Rating by Main Genre",
       x = "IMDb Rating", y = "Main Genre") +
  theme_ridges() +
  theme(legend.position = "none")
## Picking joint bandwidth of 0.355

The density ridgeline plot provides a comparison of IMDb ratings across different main genres. It shows the distribution and concentration of IMDb ratings for each genre, highlighting patterns of audience reception. For instance, genres such as Drama, Adventure, and Animation have a strong central tendency towards higher ratings (around 7-8), reflecting consistent audience approval. On the other hand, Documentary and Horror genres display wider variations, indicating diverse audience reception. This plot helps identify which genres generally achieve higher critical acclaim, aiding in strategic decisions about genre focus.

Analyzing Genre Preferences and Profitability

# Load necessary libraries
library(dplyr)
library(ggplot2)

# Calculate average ROI and audience satisfaction by genre
genre_roi <- data %>%
  group_by(main_genres) %>%
  summarize(
    avg_roi = mean(roi_adj, na.rm = TRUE),
    avg_rating = mean(IMDb_Rating, na.rm = TRUE)
  )

# Plot ROI by genre
ggplot(genre_roi, aes(x = reorder(main_genres, avg_roi), y = avg_roi)) +
  geom_bar(stat = "identity", fill = "blue") +
  coord_flip() +
  labs(title = "Average ROI by Genre", x = "Genre", y = "Average ROI")

# Plot audience satisfaction by genre
ggplot(genre_roi, aes(x = reorder(main_genres, avg_rating), y = avg_rating)) +
  geom_bar(stat = "identity", fill = "green") +
  coord_flip() +
  labs(title = "Average Audience Satisfaction by Genre", x = "Genre", y = "Average Rating")

Average ROI by Genre: The blue bar chart highlights that genres such as Horror, Mystery, and Thriller tend to achieve the highest returns on investment (ROI). This suggests these genres are cost-efficient and capable of generating significant profits compared to their production budgets, possibly due to lower production costs or niche audience appeal.

Average Audience Satisfaction by Genre: The green chart reveals that genres such as Western, History, and Animation receive the highest average IMDb ratings, indicating strong audience satisfaction. However, genres like Horror and Mystery, despite high ROIs, have lower average ratings, showing a potential gap between profitability and viewer approval.

Evaluating Budget and Its Impact on ROI

# Relationship between budget and ROI
ggplot(data, aes(x = production_budget_adj, y = roi_adj)) +
  geom_point() +
  geom_smooth(method = "lm", col = "red") +
  labs(title = "Production Budget vs. ROI", x = "Production Budget", y = "ROI")
## `geom_smooth()` using formula = 'y ~ x'

# Group movies by budget categories (low, medium, high)
data <- data %>%
  mutate(budget_category = case_when(
    production_budget_adj < 25000000 ~ "Low",
    production_budget_adj >= 25000000 & production_budget_adj < 75000000 ~ "Medium",
    TRUE ~ "High"
  ))

# Calculate ROI and profit margin by budget category
budget_roi <- data %>%
  group_by(budget_category) %>%
  summarize(
    avg_roi = mean(roi_adj, na.rm = TRUE),
    avg_profit_adj_margin = mean(profit_adj_margin, na.rm = TRUE)
  )

# Plot ROI by budget category
ggplot(budget_roi, aes(x = budget_category, y = avg_roi)) +
  geom_bar(stat = "identity", fill = "blue") +
  labs(title = "Average ROI by Budget Category", x = "Budget Category", y = "Average ROI")

Scatter Plot (Production Budget vs. ROI): Movies with lower production budgets tend to show a much higher variation in ROI, with some extremely high outliers. The trend suggests a diminishing ROI as production budgets increase, indicating that lower-budget films might achieve greater profitability proportional to their cost.

Bar Chart (Average ROI by Budget Category): Low-budget movies exhibit the highest average ROI, far outperforming medium and high-budget movies. This could be due to their lower costs making even modest profits appear substantial in percentage terms. Medium and high-budget movies show relatively stable and comparable ROIs, though lower than low-budget films.

Optimizing Release Timing

# Analyze seasonality effect on profitability
season_roi <- data %>%
  group_by(Seasons) %>%
  summarize(
    avg_profit = mean(profit_adj, na.rm = TRUE),
    avg_worldwide_gross_adj = mean(worldwide_gross_adj, na.rm = TRUE)
  )

# Plot average profit by season
ggplot(season_roi, aes(x = reorder(Seasons, avg_profit), y = avg_profit)) +
  geom_bar(stat = "identity", fill = "purple") +
  labs(title = "Average Profit by Season", x = "Season", y = "Average Profit")

# Plot average ROI by season
ggplot(season_roi, aes(x = reorder(Seasons, avg_worldwide_gross_adj), y = avg_worldwide_gross_adj)) +
  geom_bar(stat = "identity", fill = "orange") +
  labs(title = "Average Worldwide Gross Adj by Season", x = "Season", y = "Average Worldwide Gross Adj")

The first plot, “Average Profit by Season,” indicates a clear seasonal trend in profits. Summer and Spring seasons emerge as the most profitable periods, with Summer leading slightly. Winter follows, with Fall trailing as the least profitable season. This pattern suggests that movies released during the Spring and Summer seasons benefit from higher profitability, likely due to increased audience turnout during holidays and favorable weather conditions.

The second plot, “Average Worldwide Gross Adj by Season,” aligns closely with the profit analysis. It shows that Spring and Summer seasons not only yield higher profits but also generate significantly higher worldwide gross earnings. This consistency underscores the importance of timing movie releases to maximize box office revenue. In contrast, Fall, with the lowest average gross and profit, may reflect reduced audience engagement during this season.

Analyzing Target Audience and MPAA Rating Impact

# Audience satisfaction and MPAA rating
rating_satisfaction <- data %>%
  group_by(MPAA_Rating) %>%
  summarize(
    avg_rating = mean(Critic_score, na.rm = TRUE),
    avg_worldwide_gross_adj = mean(worldwide_gross_adj, na.rm = TRUE)
  )

# Plot audience satisfaction by MPAA rating
ggplot(rating_satisfaction, aes(x = reorder(MPAA_Rating, avg_rating), y = avg_rating)) +
  geom_bar(stat = "identity", fill = "green") +
  labs(title = "Audience Satisfaction by MPAA Rating", x = "MPAA Rating", y = "Average Rating")

# Plot ROI by MPAA rating
ggplot(rating_satisfaction, aes(x = reorder(MPAA_Rating, avg_worldwide_gross_adj), y = avg_worldwide_gross_adj)) +
  geom_bar(stat = "identity", fill = "blue") +
  labs(title = "Worldwide Gross Adj by MPAA Rating", x = "MPAA Rating", y = "Average Worldwide Gross Adj")

The analysis of MPAA ratings provides valuable insights into how movie ratings influence audience satisfaction and revenue generation. The first bar chart, displaying “Audience Satisfaction by MPAA Rating,” highlights that NC-17 rated movies achieve the highest average critic scores. However, movies rated PG and G also perform well in terms of audience satisfaction. This suggests that while NC-17 movies may cater to a niche audience with high approval, family-friendly ratings such as PG and G enjoy broader acceptance.

The second chart, “Worldwide Gross Adj by MPAA Rating,” reveals a contrasting trend. Movies with a G rating generate the highest worldwide gross, indicating their appeal to a universal audience, often including children and families. PG-rated movies follow closely, further solidifying the dominance of family-oriented content in terms of financial performance. Meanwhile, NC-17 movies, despite their high ratings, have minimal financial returns, likely due to limited audience reach and distribution challenges. This underscores the importance of balancing creative vision with market accessibility when deciding on an MPAA rating for a film.

Exploring the Relationship Between Runtime and Profitability

# Relationship between Runtime and ROI
ggplot(data, aes(x = Runtime, y = worldwide_gross_adj)) +
  geom_point() +
  geom_smooth(method = "lm", col = "blue") +
  labs(title = "Runtime vs. worldwide Gross Adj", x = "Runtime (min)", y = "Worldwide Gross Adj")
## `geom_smooth()` using formula = 'y ~ x'

# Relationship between Runtime and Audience Rating
ggplot(data, aes(x = Runtime, y = IMDb_Rating)) +
  geom_point() +
  geom_smooth(method = "lm", col = "green") +
  labs(title = "Runtime vs. IMDb Rating", x = "Runtime (min)", y = "Audience Rating")
## `geom_smooth()` using formula = 'y ~ x'

library(ggplot2)
library(plotly)

# Create scatter plot with text and regression line for Runtime vs IMDb Rating
a <- ggplot(data, aes(x = Runtime, y = IMDb_Rating, text = movie)) +
  geom_point(color = "blue", size = 2, alpha = 0.7) +  # Scatter points
  geom_smooth(method = "lm", col = "green", se = TRUE) +  # Linear regression line
  labs(title = "Runtime vs. IMDb Rating", 
       x = "Runtime (min)", 
       y = "Audience Rating") +
  theme_minimal()

# Convert the ggplot object to an interactive plot using plotly
ggplotly(a)
## `geom_smooth()` using formula = 'y ~ x'

Runtime vs. Worldwide Gross Adjusted: The scatter plot indicates a positive linear trend, suggesting that longer movies tend to generate higher worldwide gross revenue (adjusted). However, the data also demonstrates a significant variance, particularly for shorter runtimes, where some movies achieve high revenues despite shorter durations.

Runtime vs. IMDb Rating: This scatter plot, complemented by a regression line, highlights a moderate positive correlation between runtime and audience rating. Longer movies slightly correlate with better ratings, although exceptions exist. Notably, the density of points for mid-length runtimes (around 100-120 minutes) suggests they are most common and generally well-received.

Interactive Runtime vs. IMDb Rating: The interactive version of the second plot enhances user engagement by allowing movie-specific exploration. This feature helps identify outliers or specific movies with unusually high or low ratings relative to their runtimes.

Genre Combinations and Their Effect on Profitability

# Number of genres associated with each movie and its impact on ROI
ggplot(data, aes(x = genre_count, y = worldwide_gross_adj)) +
  geom_point() +
  geom_smooth(method = "lm", col = "red") +
  labs(title = "Number of Genres vs. Worldwide Gross Adj", x = "Number of Genres", y = "Worldwide Gross Adj")
## `geom_smooth()` using formula = 'y ~ x'

# Cross-tabulation of main genres and Worldwide Gross Adj
genre_worldgross <- data %>%
  group_by(main_genres) %>%
  summarize(avg_worldwide_gross_adj = mean(worldwide_gross_adj, na.rm = TRUE))

The visualization illustrates the relationship between the number of genres in a movie and its adjusted worldwide gross. While the linear regression line suggests a slight positive trend, the variation within each genre count highlights that profitability is not solely dictated by the number of genres but also by other factors such as quality, audience appeal, and marketing. This indicates that while combining genres might attract a broader audience, successful execution remains critical. Further analysis of average worldwide gross by specific genres could provide deeper insights into which genres consistently drive higher profitability, regardless of the number of genres involved.

Investigating Director and Cast Influence

# Analyzing Director's impact on ROI
director_worldgross <- data %>%
  group_by(Director) %>%
  summarize(
    avg_worldwide_gross_adj = mean(worldwide_gross_adj, na.rm = TRUE),
    avg_rating = mean(IMDb_Rating, na.rm = TRUE)
  )

# Top 10 Directors by ROI
top_directors <- director_worldgross %>%
  arrange(desc(avg_worldwide_gross_adj)) %>%
  head(25)

# Plotting Top 10 Directors by ROI
ggplot(top_directors, aes(x = reorder(Director, avg_worldwide_gross_adj), y = avg_worldwide_gross_adj)) +
  geom_bar(stat = "identity", fill = "blue") +
  coord_flip() +
  labs(title = "Top 10 Directors by Worldwide Gross Adj", x = "Director", y = "Average Worldwide Gross Adj")

The chart highlights the top directors ranked by their average worldwide gross adjusted, showcasing their significant influence on movie profitability. These results underline the pivotal role a director plays in a film’s financial success, as their vision, storytelling, and ability to assemble talented teams directly impact a movie’s market appeal. Additionally, the diversity in the list reflects a variety of genres and collaborative styles that can lead to financial triumph in the global market.

Analyzing Critic vs. Audience Reception

# Relationship between critic score and audience rating
ggplot(data, aes(x = Critic_score, y = IMDb_Rating)) +
  geom_point() +
  geom_smooth(method = "lm", col = "purple") +
  labs(title = "Critic Score vs. Audience Rating", x = "Critic Score", y = "Audience Rating")
## `geom_smooth()` using formula = 'y ~ x'

The scatterplot illustrates the relationship between critic scores and audience ratings, revealing a positive correlation. As critic scores increase, there is a noticeable upward trend in audience ratings, indicating that movies with higher critic approval tend to resonate well with audiences. This alignment suggests that critical acclaim often translates into audience appreciation, emphasizing the role of critics in shaping viewer perceptions and the potential market reception of a film.

Calculating Risk (Variance) in worldwide gross adjusted

# Variance in Worldwide Gross Adj by Genre
genre_risk <- data %>%
  group_by(main_genres) %>%
  summarize(worldwide_gross_adj_variance = var(worldwide_gross_adj, na.rm = TRUE))

# Plot Variance in Worldwide Gross Adj by Genre
ggplot(genre_risk, aes(x = reorder(main_genres, worldwide_gross_adj_variance), y = worldwide_gross_adj_variance)) +
  geom_bar(stat = "identity", fill = "red") +
  coord_flip() +
  labs(title = "Variance in Worldwide Gross Adj by Genre", x = "Genre", y = "Variance in Worldwide Gross Adj")

The bar chart illustrates the variance in worldwide gross adjusted revenue across different movie genres, offering insight into the financial risks associated with each genre. Genres like “Family” and “Science Fiction” exhibit the highest variance, suggesting significant inconsistency in revenue outcomes—likely influenced by a mix of high-grossing blockbusters and underperforming titles within these categories. In contrast, genres such as “Documentary” and “Music” display minimal variance, indicating more stable but lower revenue performance. These patterns emphasize the trade-off between potential high returns and financial risk when investing in genres with high revenue variability.

Comparing Profitability by MPAA Rating Over Time

# Grouping by MPAA Rating and Year to see trends over time
mpaa_yearly_roi <- data %>%
  group_by(year, MPAA_Rating) %>%
  summarize(avg_roi = mean(roi_adj, na.rm = TRUE))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
# Plotting MPAA Rating Trends over time
ggplot(mpaa_yearly_roi, aes(x = year, y = avg_roi, color = MPAA_Rating)) +
  geom_line() +
  labs(title = "MPAA Rating ROI Trends Over Time", x = "Year", y = "Average ROI")

The line chart illustrates the return on investment (ROI) trends over time across different MPAA ratings, providing insight into how profitability varies for movies with different content classifications. “R-rated” movies display significant fluctuations, peaking in certain years, which could be tied to the success of specific high-grossing films. On the other hand, “G” and “PG” rated films maintain relatively steady, albeit lower, ROI levels, reflecting consistent but modest performance. The “PG-13” category, representing a broad audience appeal, shows stable trends with occasional increases, suggesting its reliability as a profitable rating. Such patterns emphasize how audience reach and movie content restrictions interplay to influence financial success.

Production Budget Dynamics

# Visualizing the distribution of production budgets
library(ggplot2)
ggplot(data, aes(x = production_budget_adj)) +
  geom_histogram(bins = 30, fill = "blue", alpha = 0.7) +
  labs(title = "Distribution of Production Budgets", x = "Production Budget", y = "Count")

# Check correlation between numerical variables and production budget
cor(data[,c("production_budget_adj", "Runtime", "IMDb_Rating")], use = "complete.obs")
##                       production_budget_adj   Runtime IMDb_Rating
## production_budget_adj             1.0000000 0.3658046   0.1363438
## Runtime                           0.3658046 1.0000000   0.4030555
## IMDb_Rating                       0.1363438 0.4030555   1.0000000
# Boxplot of production budget by genre
ggplot(data, aes(x = main_genres, y = production_budget_adj)) +
  geom_boxplot(fill = "lightblue") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(title = "Production Budget by Genre", x = "Main Genre", y = "Production Budget")

The histogram showcasing the distribution of production budgets reveals a right-skewed pattern, with the majority of films clustering in the lower budget range, typically under $50 million. This indicates that most films are produced with modest budgets, likely catering to niche audiences or focusing on smaller-scale storytelling. However, the presence of a long tail highlights a smaller but significant number of high-budget productions, reflecting the blockbuster trend in genres like Action and Science Fiction, where budgets can exceed hundreds of millions to accommodate expansive visual effects and global marketing campaigns.

The boxplot examining production budgets by genre emphasizes the financial disparities across different categories. High-budget genres like Action, Adventure, and Science Fiction stand out with median budgets significantly above other genres, driven by their reliance on cutting-edge technology, extensive special effects, and high-profile cast. On the other hand, genres like Documentary, Comedy, and Horror demonstrate lower budget medians, suggesting they can achieve success through minimalistic setups, character-driven plots, and smaller production teams. These insights indicate that genre choice is a pivotal factor in determining the scale of financial investment required for a film.

Outliers

summary(data$worldwide_gross_adj)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 2.896e+04 2.145e+07 7.299e+07 1.689e+08 1.934e+08 2.048e+09
# Calculate Q1, Q3, and IQR
Q1 <- quantile(data$worldwide_gross_adj, 0.25, na.rm = TRUE)
Q3 <- quantile(data$worldwide_gross_adj, 0.75, na.rm = TRUE)
IQR <- Q3 - Q1

# Define lower and upper bounds for outliers
lower_bound <- Q1 - 1.5 * IQR
upper_bound <- Q3 + 1.5 * IQR

# Count the number of outliers
num_outliers <- sum(data$worldwide_gross_adj < lower_bound | data$worldwide_gross_adj > upper_bound, na.rm = TRUE)

# Print the number of outliers
cat("Number of outliers in worldwide_gross_adj:", num_outliers, "\n")
## Number of outliers in worldwide_gross_adj: 128
# Filter data to remove outliers
data <- data %>%
  filter(worldwide_gross_adj >= lower_bound & worldwide_gross_adj <= upper_bound)

# Verify removal of outliers
summary(data$worldwide_gross_adj)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
##     28956  18269598  60446550  98272452 141985106 448808221

Outliers often represent anomalies such as exceptional blockbusters or poorly performing movies. Removing these extremes helps in ensuring that further analysis reflects the general trends and avoids being skewed by a few extreme cases. However, this decision should be balanced based on the goals of your analysis—outliers can also provide valuable insights into exceptional cases. If the focus is on the typical behavior of movies, outlier removal is beneficial. For analysis of blockbuster success patterns, those outliers might warrant separate study.

summary(data$main_genres)
##          Main_Action       Main_Adventure       Main_Animation 
##                  196                   60                   28 
##          Main_Comedy           Main_Crime     Main_Documentary 
##                  236                   63                   10 
##           Main_Drama          Main_Family         Main_Fantasy 
##                  315                   12                   26 
##         Main_History          Main_Horror           Main_Music 
##                    9                   75                    5 
##         Main_Mystery         Main_Romance Main_Science Fiction 
##                   14                   31                   21 
##        Main_Thriller             Main_War         Main_Western 
##                   72                   10                    1

Performing log to worldwide gross

# Apply log transformation to the scaled variable and create a new column
data$Log_worldwide_gross_adj <- log(data$worldwide_gross_adj)

# View the first few rows of the dataframe
head(data)
## # A tibble: 6 × 82
##   movie      year production_budget domestic_gross foreign_gross worldwide_gross
##   <chr>     <int>             <dbl>          <dbl>         <dbl>           <dbl>
## 1 Solo: A …  2018         275000000      213767512     179383835       393151347
## 2 The Lone…  2013         275000000       89302115     170700000       260002115
## 3 John Car…  2012         275000000       73058679     209719421       282778100
## 4 Battlesh…  2012         220000000       65233400     248244317       313477717
## 5 Robin Ho…  2010         210000000      105487148     216971858       322459006
## 6 Green La…  2011         200000000      116601172     102934320       219535492
## # ℹ 76 more variables: month <dbl>, Seasons <fct>, profit <dbl>,
## #   profit_category <chr>, profit_margin <chr>, roi <dbl>, pct_foreign <dbl>,
## #   match_key <chr>, popularity <dbl>, release_date <chr>,
## #   original_language <chr>, vote_average <dbl>, vote_count <dbl>,
## #   vote_ratio <dbl>, genre_list <chr>, genres <chr>, main_genres <fct>,
## #   Action <dbl>, Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## #   Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, …
# Summary of the log-transformed variable
summary(data$Log_worldwide_gross_adj)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   10.27   16.72   17.92   17.56   18.77   19.92

We are converting the World wide Gross Adj in the Log World wide Gross adj so that the range become less in dependent variable.

Creating the dummies variables for Runtime catagory

# Define the runtime categories (excluding the reference category)
time_cat <- c("90 to 135", "Greater than 135")

# Loop through each runtime category to create binary variables
for (category in time_cat) {
  data[[category]] <- ifelse(data$Runtime_category == category, 1, 0)
}

# "Greater than 135" is automatically the reference category

# View the first few rows of the dataframe
head(data)
## # A tibble: 6 × 84
##   movie      year production_budget domestic_gross foreign_gross worldwide_gross
##   <chr>     <int>             <dbl>          <dbl>         <dbl>           <dbl>
## 1 Solo: A …  2018         275000000      213767512     179383835       393151347
## 2 The Lone…  2013         275000000       89302115     170700000       260002115
## 3 John Car…  2012         275000000       73058679     209719421       282778100
## 4 Battlesh…  2012         220000000       65233400     248244317       313477717
## 5 Robin Ho…  2010         210000000      105487148     216971858       322459006
## 6 Green La…  2011         200000000      116601172     102934320       219535492
## # ℹ 78 more variables: month <dbl>, Seasons <fct>, profit <dbl>,
## #   profit_category <chr>, profit_margin <chr>, roi <dbl>, pct_foreign <dbl>,
## #   match_key <chr>, popularity <dbl>, release_date <chr>,
## #   original_language <chr>, vote_average <dbl>, vote_count <dbl>,
## #   vote_ratio <dbl>, genre_list <chr>, genres <chr>, main_genres <fct>,
## #   Action <dbl>, Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## #   Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, …

Created the dummies variables for the for the Runtime catagories So we can get the proper results when we drive more deep in the analysis part.

Creating the dummies variables for Seasons

# List of seasons to create binary variables for
Seasons <- c("Spring", "Summer", "Fall")

# Create binary variables for each season
for (season in Seasons) {
  data[[season]] <- ifelse(data$Seasons == season, 1, 0)
}

# View the first few rows of the dataframe
head(data)
## # A tibble: 6 × 87
##   movie      year production_budget domestic_gross foreign_gross worldwide_gross
##   <chr>     <int>             <dbl>          <dbl>         <dbl>           <dbl>
## 1 Solo: A …  2018         275000000      213767512     179383835       393151347
## 2 The Lone…  2013         275000000       89302115     170700000       260002115
## 3 John Car…  2012         275000000       73058679     209719421       282778100
## 4 Battlesh…  2012         220000000       65233400     248244317       313477717
## 5 Robin Ho…  2010         210000000      105487148     216971858       322459006
## 6 Green La…  2011         200000000      116601172     102934320       219535492
## # ℹ 81 more variables: month <dbl>, Seasons <fct>, profit <dbl>,
## #   profit_category <chr>, profit_margin <chr>, roi <dbl>, pct_foreign <dbl>,
## #   match_key <chr>, popularity <dbl>, release_date <chr>,
## #   original_language <chr>, vote_average <dbl>, vote_count <dbl>,
## #   vote_ratio <dbl>, genre_list <chr>, genres <chr>, main_genres <fct>,
## #   Action <dbl>, Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## #   Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, …

Created the dummies variables for the for the Seasons catagories So we can get the proper results when we drive more deep in the analysis part.

Creating the dummies variables for MPAA Rating

# List of seasons to create binary variables for
ratings <- c("PG-13" ,"R","PG" ,"G")

# Create binary variables for each season
for (rating in ratings) {
  data[[rating]] <- ifelse(data$MPAA_Rating == rating, 1, 0)
}

# View the first few rows of the dataframe
head(data)
## # A tibble: 6 × 91
##   movie      year production_budget domestic_gross foreign_gross worldwide_gross
##   <chr>     <int>             <dbl>          <dbl>         <dbl>           <dbl>
## 1 Solo: A …  2018         275000000      213767512     179383835       393151347
## 2 The Lone…  2013         275000000       89302115     170700000       260002115
## 3 John Car…  2012         275000000       73058679     209719421       282778100
## 4 Battlesh…  2012         220000000       65233400     248244317       313477717
## 5 Robin Ho…  2010         210000000      105487148     216971858       322459006
## 6 Green La…  2011         200000000      116601172     102934320       219535492
## # ℹ 85 more variables: month <dbl>, Seasons <fct>, profit <dbl>,
## #   profit_category <chr>, profit_margin <chr>, roi <dbl>, pct_foreign <dbl>,
## #   match_key <chr>, popularity <dbl>, release_date <chr>,
## #   original_language <chr>, vote_average <dbl>, vote_count <dbl>,
## #   vote_ratio <dbl>, genre_list <chr>, genres <chr>, main_genres <fct>,
## #   Action <dbl>, Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## #   Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, …

Created the dummies variables for the for the MPAA Rating catagories So we can get the proper results when we drive more deep in the analysis part.

Creating the dummies variables for Genres count

# List of seasons to create binary variables for
Counts <- c("1" ,"2","3" ,"4" ,"5", "6")

# Create binary variables for each season
for (Count in Counts) {
  data[[Count]] <- ifelse(data$genre_count == Count, 1, 0)
}

# View the first few rows of the dataframe
head(data)
## # A tibble: 6 × 97
##   movie      year production_budget domestic_gross foreign_gross worldwide_gross
##   <chr>     <int>             <dbl>          <dbl>         <dbl>           <dbl>
## 1 Solo: A …  2018         275000000      213767512     179383835       393151347
## 2 The Lone…  2013         275000000       89302115     170700000       260002115
## 3 John Car…  2012         275000000       73058679     209719421       282778100
## 4 Battlesh…  2012         220000000       65233400     248244317       313477717
## 5 Robin Ho…  2010         210000000      105487148     216971858       322459006
## 6 Green La…  2011         200000000      116601172     102934320       219535492
## # ℹ 91 more variables: month <dbl>, Seasons <fct>, profit <dbl>,
## #   profit_category <chr>, profit_margin <chr>, roi <dbl>, pct_foreign <dbl>,
## #   match_key <chr>, popularity <dbl>, release_date <chr>,
## #   original_language <chr>, vote_average <dbl>, vote_count <dbl>,
## #   vote_ratio <dbl>, genre_list <chr>, genres <chr>, main_genres <fct>,
## #   Action <dbl>, Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## #   Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, …

Created the dummies variables for the for the Genre Count catagories So we can get the proper results when we drive more deep in the analysis part.

which so ever dummies we have made we took 1 of it catagories as a refereance when we will running the models.

doing log to Production budget

# Apply log transformation to Worldwide_Gross_Adj and create a new column
data$Log_production_budget_adj <- log(data$production_budget_adj)

# View the first few rows of the dataframe
head(data)
## # A tibble: 6 × 98
##   movie      year production_budget domestic_gross foreign_gross worldwide_gross
##   <chr>     <int>             <dbl>          <dbl>         <dbl>           <dbl>
## 1 Solo: A …  2018         275000000      213767512     179383835       393151347
## 2 The Lone…  2013         275000000       89302115     170700000       260002115
## 3 John Car…  2012         275000000       73058679     209719421       282778100
## 4 Battlesh…  2012         220000000       65233400     248244317       313477717
## 5 Robin Ho…  2010         210000000      105487148     216971858       322459006
## 6 Green La…  2011         200000000      116601172     102934320       219535492
## # ℹ 92 more variables: month <dbl>, Seasons <fct>, profit <dbl>,
## #   profit_category <chr>, profit_margin <chr>, roi <dbl>, pct_foreign <dbl>,
## #   match_key <chr>, popularity <dbl>, release_date <chr>,
## #   original_language <chr>, vote_average <dbl>, vote_count <dbl>,
## #   vote_ratio <dbl>, genre_list <chr>, genres <chr>, main_genres <fct>,
## #   Action <dbl>, Adventure <dbl>, Animation <dbl>, Comedy <dbl>, Crime <dbl>,
## #   Documentary <dbl>, Drama <dbl>, Family <dbl>, Fantasy <dbl>, …

We are converting the World wide Gross Adj in the Log World wide Gross adj so that the range become less.

Changing the names so it don’t give errors later

# Rename the column
colnames(data)[colnames(data) == "Main_Science Fiction"] <- "Main_Science_Fiction"
colnames(data)[colnames(data) == "PG-13"] <- "PG.13"
colnames(data)[colnames(data) == "90 to 135"] <- "between_90_to_135"
colnames(data)[colnames(data) == "Greater than 135"] <- "Greater_than_135"

# Replace "PG-13" with "PG.13" in the MPAA_Rating column
data$MPAA_Rating <- gsub("PG-13", "PG.13", data$MPAA_Rating)
data$MPAA_Rating <- gsub("NC-17", "NC.17", data$MPAA_Rating)
# Check the unique values in the column to confirm the change
unique(data$MPAA_Rating)
## [1] "PG.13" "PG"    "R"     "G"     "NC.17"
data$main_genres <- gsub("Science Fiction", "Science_Fiction", data$main_genres)
unique(data$main_genres)
##  [1] "Main_Action"          "Main_Thriller"        "Main_Adventure"      
##  [4] "Main_Fantasy"         "Main_Drama"           "Main_Science_Fiction"
##  [7] "Main_Comedy"          "Main_Horror"          "Main_Animation"      
## [10] "Main_Family"          "Main_Crime"           "Main_War"            
## [13] "Main_History"         "Main_Music"           "Main_Documentary"    
## [16] "Main_Mystery"         "Main_Romance"         "Main_Western"
data$Runtime_category <- gsub("90 to 135", "90.to.135", data$Runtime_category)
data$Runtime_category <- gsub("Greater than 135", "Greater.than.135", data$Runtime_category)
data$Runtime_category <- gsub("Less than 90", "Less.than.90", data$Runtime_category)
unique(data$Runtime_category)
## [1] "90.to.135"        "Greater.than.135" "Less.than.90"
colnames(data)[colnames(data) == "Science Fiction"] <- "Science_Fiction"

# Check if the column has been renamed
colnames(data)
##  [1] "movie"                     "year"                     
##  [3] "production_budget"         "domestic_gross"           
##  [5] "foreign_gross"             "worldwide_gross"          
##  [7] "month"                     "Seasons"                  
##  [9] "profit"                    "profit_category"          
## [11] "profit_margin"             "roi"                      
## [13] "pct_foreign"               "match_key"                
## [15] "popularity"                "release_date"             
## [17] "original_language"         "vote_average"             
## [19] "vote_count"                "vote_ratio"               
## [21] "genre_list"                "genres"                   
## [23] "main_genres"               "Action"                   
## [25] "Adventure"                 "Animation"                
## [27] "Comedy"                    "Crime"                    
## [29] "Documentary"               "Drama"                    
## [31] "Family"                    "Fantasy"                  
## [33] "History"                   "Horror"                   
## [35] "Music"                     "Mystery"                  
## [37] "Romance"                   "Science_Fiction"          
## [39] "Thriller"                  "War"                      
## [41] "Western"                   "Production_Company"       
## [43] "IMDb_Rating"               "Distributor"              
## [45] "Director"                  "Cast"                     
## [47] "Producer"                  "Screenwriter"             
## [49] "MPAA_Rating"               "Runtime"                  
## [51] "Critic_score"              "cpi"                      
## [53] "production_budget_adj"     "domestic_gross_adj"       
## [55] "foreign_gross_adj"         "worldwide_gross_adj"      
## [57] "profit_adj"                "roi_adj"                  
## [59] "profit_adj_margin"         "genre_count"              
## [61] "Runtime_category"          "Main_Action"              
## [63] "Main_Adventure"            "Main_Animation"           
## [65] "Main_Comedy"               "Main_Crime"               
## [67] "Main_Documentary"          "Main_Drama"               
## [69] "Main_Family"               "Main_Fantasy"             
## [71] "Main_History"              "Main_Horror"              
## [73] "Main_Music"                "Main_Mystery"             
## [75] "Main_Romance"              "Main_Science_Fiction"     
## [77] "Main_Thriller"             "Main_War"                 
## [79] "Main_Western"              "Other_Genres"             
## [81] "budget_category"           "Log_worldwide_gross_adj"  
## [83] "between_90_to_135"         "Greater_than_135"         
## [85] "Spring"                    "Summer"                   
## [87] "Fall"                      "PG.13"                    
## [89] "R"                         "PG"                       
## [91] "G"                         "1"                        
## [93] "2"                         "3"                        
## [95] "4"                         "5"                        
## [97] "6"                         "Log_production_budget_adj"

Log-Transformed Distributions: Worldwide Gross and Production Budget

hist(data$Log_worldwide_gross_adj, main = "worldwide gross adj", breaks = 50)

hist(log(data$Log_production_budget_adj + 1), main = "Log-Transformed Budget", breaks = 50)

The histogram for the log-transformed worldwide gross (Log_worldwide_gross_adj) shows a right-skewed distribution that becomes more normalized after applying the log transformation. The data clusters between the values of 14 and 18, suggesting that most movies have an adjusted gross value concentrated within this range. This transformation helps reduce the impact of extreme outliers in the gross values, enabling a better understanding of the central tendencies and variance within the dataset.

For the log-transformed production budget (Log_production_budget_adj), the histogram also indicates a right-skewed distribution that becomes more symmetrical after transformation. The values predominantly range between 2.7 and 3.0, reflecting that most movies have production budgets within a smaller, more consistent range post-logarithmic scaling. This log transformation mitigates the high variance typically associated with production budgets, making it easier to compare across movies.

worldwide catagory

# Categorize Log_worldwide_gross_adj into buckets
data <- data %>%
  mutate(Log_Worldwide_Gross_Category = case_when(
    Log_worldwide_gross_adj <= 17.19 ~ "Low's",
    Log_worldwide_gross_adj > 17.19 & Log_worldwide_gross_adj <= 18.46 ~ "Medium",
    Log_worldwide_gross_adj > 18.46 ~ "High's"
  ))

# Convert to factor
data$Log_Worldwide_Gross_Category <- as.factor(data$Log_Worldwide_Gross_Category)

# Check if the transformation is correct
table(data$Log_Worldwide_Gross_Category)
## 
## High's  Low's Medium 
##    394    395    395

These categories are based on the quantile-based thresholds defined as follows:

  1. “Low’s”: Represents movies with a log-transformed worldwide gross adjusted value of less than or equal to 17.19. This category includes 395 movies.
  2. “Medium”: Represents movies with values between 17.19 and 18.46. This range contains 395 movies as well.
  3. “High’s”: Represents movies with values greater than 18.46. This bucket also has 394 movies.

By converting the categorized variable into a factor, it ensures that the categories are treated as distinct, non-numeric labels in any subsequent analysis. The categorization results in nearly equal distribution across the three groups, which is useful for comparative analyses across different gross levels.

Multinomial Logistic Regression with Cross-Validation of whole data

# Load required libraries
library(caret)
## Warning: package 'caret' was built under R version 4.3.2
## Loading required package: lattice
# Prepare the dataset
set.seed(123)  # For reproducibility
data <- as.data.frame(data)

# Define the control method for cross-validation
cv_control <- trainControl(
  method = "cv",         # Cross-validation method
  number = 5,            # Number of folds
  verboseIter = TRUE,    # Display progress
  savePredictions = TRUE # Save predictions for analysis
)

# Load required library
library(caret)
library(nnet)

# Define cross-validation control
cv_control <- trainControl(method = "cv", number = 10) # 10-fold cross-validation

# Train a multinomial logistic regression model
cv_model <- train(
  Log_Worldwide_Gross_Category ~ Log_production_budget_adj + PG.13 + R + PG + G + between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count + Main_Action +   Main_Adventure +    Main_Animation +    Main_Comedy +   Main_Crime +    Main_Documentary +  Main_Drama +    Main_Family +   Main_Fantasy +  Main_Horror  +  Main_Mystery +  Main_Romance + Main_Science_Fiction + Main_Thriller,
  data = data,          # Dataset
  method = "multinom",            # Multinomial logistic regression
  trControl = cv_control          # Cross-validation control
)
## # weights:  81 (52 variable)
## initial  value 1168.923475 
## iter  10 value 990.595506
## iter  20 value 865.409943
## iter  30 value 836.250687
## iter  40 value 827.050334
## iter  50 value 826.280091
## iter  60 value 826.136525
## final  value 826.134017 
## converged
## # weights:  81 (52 variable)
## initial  value 1168.923475 
## iter  10 value 997.655452
## iter  20 value 924.236974
## iter  30 value 916.048289
## iter  40 value 915.836630
## final  value 915.834463 
## converged
## # weights:  81 (52 variable)
## initial  value 1168.923475 
## iter  10 value 990.603056
## iter  20 value 865.508858
## iter  30 value 836.436249
## iter  40 value 827.307871
## iter  50 value 826.551587
## iter  60 value 826.442214
## final  value 826.441395 
## converged
## # weights:  81 (52 variable)
## initial  value 1168.923475 
## iter  10 value 983.955118
## iter  20 value 869.200988
## iter  30 value 838.300509
## iter  40 value 832.404507
## iter  50 value 831.232362
## iter  60 value 831.061220
## final  value 831.059683 
## converged
## # weights:  81 (52 variable)
## initial  value 1168.923475 
## iter  10 value 992.236018
## iter  20 value 922.946508
## iter  30 value 915.271488
## iter  40 value 914.969517
## final  value 914.966578 
## converged
## # weights:  81 (52 variable)
## initial  value 1168.923475 
## iter  10 value 983.964016
## iter  20 value 869.302155
## iter  30 value 838.468071
## iter  40 value 832.614392
## iter  50 value 831.480797
## iter  60 value 831.335441
## final  value 831.333251 
## converged
## # weights:  81 (52 variable)
## initial  value 1171.120700 
## iter  10 value 982.516075
## iter  20 value 863.480514
## iter  30 value 828.438313
## iter  40 value 821.458346
## iter  50 value 818.118478
## iter  60 value 817.905595
## final  value 817.903700 
## converged
## # weights:  81 (52 variable)
## initial  value 1171.120700 
## iter  10 value 991.971384
## iter  20 value 918.029818
## iter  30 value 910.415086
## iter  40 value 910.251032
## iter  40 value 910.251026
## iter  40 value 910.251026
## final  value 910.251026 
## converged
## # weights:  81 (52 variable)
## initial  value 1171.120700 
## iter  10 value 982.526229
## iter  20 value 863.586290
## iter  30 value 828.623542
## iter  40 value 821.700656
## iter  50 value 818.405318
## iter  60 value 818.241964
## iter  70 value 818.235454
## final  value 818.234914 
## converged
## # weights:  81 (52 variable)
## initial  value 1172.219312 
## iter  10 value 985.240681
## iter  20 value 886.637121
## iter  30 value 851.831980
## iter  40 value 843.243837
## iter  50 value 842.614085
## iter  60 value 842.499891
## final  value 842.497309 
## converged
## # weights:  81 (52 variable)
## initial  value 1172.219312 
## iter  10 value 993.608153
## iter  20 value 935.114138
## iter  30 value 924.018424
## iter  40 value 923.812523
## final  value 923.808274 
## converged
## # weights:  81 (52 variable)
## initial  value 1172.219312 
## iter  10 value 985.249689
## iter  20 value 886.720714
## iter  30 value 851.994244
## iter  40 value 843.466456
## iter  50 value 842.855811
## iter  60 value 842.776989
## final  value 842.773090 
## converged
## # weights:  81 (52 variable)
## initial  value 1172.219312 
## iter  10 value 985.395430
## iter  20 value 875.835531
## iter  30 value 835.027372
## iter  40 value 826.638501
## iter  50 value 825.882214
## iter  60 value 825.670889
## iter  70 value 825.668305
## iter  70 value 825.668300
## iter  70 value 825.668300
## final  value 825.668300 
## converged
## # weights:  81 (52 variable)
## initial  value 1172.219312 
## iter  10 value 993.258511
## iter  20 value 926.342264
## iter  30 value 915.752675
## iter  40 value 915.541075
## final  value 915.538111 
## converged
## # weights:  81 (52 variable)
## initial  value 1172.219312 
## iter  10 value 985.403874
## iter  20 value 875.929795
## iter  30 value 835.218393
## iter  40 value 826.898327
## iter  50 value 826.159455
## iter  60 value 825.999527
## final  value 825.994860 
## converged
## # weights:  81 (52 variable)
## initial  value 1171.120700 
## iter  10 value 993.927219
## iter  20 value 887.351427
## iter  30 value 858.297717
## iter  40 value 850.912507
## iter  50 value 850.293543
## iter  60 value 850.166990
## final  value 850.166034 
## converged
## # weights:  81 (52 variable)
## initial  value 1171.120700 
## iter  10 value 1000.561888
## iter  20 value 937.208742
## iter  30 value 928.964041
## iter  40 value 928.767831
## final  value 928.762134 
## converged
## # weights:  81 (52 variable)
## initial  value 1171.120700 
## iter  10 value 993.934294
## iter  20 value 887.445791
## iter  30 value 858.457932
## iter  40 value 851.124361
## iter  50 value 850.523963
## iter  60 value 850.430698
## final  value 850.430261 
## converged
## # weights:  81 (52 variable)
## initial  value 1170.022087 
## iter  10 value 977.988497
## iter  20 value 870.717450
## iter  30 value 833.277343
## iter  40 value 825.207127
## iter  50 value 824.500245
## iter  60 value 824.396372
## final  value 824.394643 
## converged
## # weights:  81 (52 variable)
## initial  value 1170.022087 
## iter  10 value 986.189054
## iter  20 value 919.454502
## iter  30 value 910.382375
## iter  40 value 910.221738
## final  value 910.219392 
## converged
## # weights:  81 (52 variable)
## initial  value 1170.022087 
## iter  10 value 977.997297
## iter  20 value 870.795948
## iter  30 value 833.456326
## iter  40 value 825.444541
## iter  50 value 824.756099
## iter  60 value 824.683704
## final  value 824.682467 
## converged
## # weights:  81 (52 variable)
## initial  value 1170.022087 
## iter  10 value 984.020599
## iter  20 value 870.212115
## iter  30 value 839.413658
## iter  40 value 830.939967
## iter  50 value 830.245282
## iter  60 value 830.108396
## final  value 830.106525 
## converged
## # weights:  81 (52 variable)
## initial  value 1170.022087 
## iter  10 value 991.631916
## iter  20 value 926.035454
## iter  30 value 916.472420
## iter  40 value 916.251685
## final  value 916.246646 
## converged
## # weights:  81 (52 variable)
## initial  value 1170.022087 
## iter  10 value 984.028719
## iter  20 value 870.307638
## iter  30 value 839.590233
## iter  40 value 831.178181
## iter  50 value 830.501950
## iter  60 value 830.399026
## final  value 830.398356 
## converged
## # weights:  81 (52 variable)
## initial  value 1171.120700 
## iter  10 value 978.007042
## iter  20 value 881.715118
## iter  30 value 839.572736
## iter  40 value 831.307508
## iter  50 value 830.558397
## iter  60 value 830.429768
## final  value 830.427644 
## converged
## # weights:  81 (52 variable)
## initial  value 1171.120700 
## iter  10 value 986.957990
## iter  20 value 924.830437
## iter  30 value 916.176136
## iter  40 value 915.999626
## final  value 915.996273 
## converged
## # weights:  81 (52 variable)
## initial  value 1171.120700 
## iter  10 value 978.016593
## iter  20 value 881.796486
## iter  30 value 839.744252
## iter  40 value 831.533956
## iter  50 value 830.807341
## iter  60 value 830.712747
## final  value 830.712306 
## converged
## # weights:  81 (52 variable)
## initial  value 1171.120700 
## iter  10 value 994.182148
## iter  20 value 890.746755
## iter  30 value 849.367936
## iter  40 value 839.654161
## iter  50 value 839.004303
## iter  60 value 838.869090
## final  value 838.866806 
## converged
## # weights:  81 (52 variable)
## initial  value 1171.120700 
## iter  10 value 1001.068932
## iter  20 value 933.536824
## iter  30 value 925.017021
## iter  40 value 924.848206
## final  value 924.845957 
## converged
## # weights:  81 (52 variable)
## initial  value 1171.120700 
## iter  10 value 994.189508
## iter  20 value 890.832889
## iter  30 value 849.550745
## iter  40 value 839.892618
## iter  50 value 839.260620
## iter  60 value 839.164056
## final  value 839.161381 
## converged
## # weights:  81 (52 variable)
## initial  value 1300.756950 
## iter  10 value 1091.651296
## iter  20 value 970.787455
## iter  30 value 937.277836
## iter  40 value 928.208085
## iter  50 value 927.291351
## iter  60 value 927.167785
## final  value 927.166727 
## converged
# View model results
print(cv_model)
## Penalized Multinomial Regression 
## 
## 1184 samples
##   25 predictor
##    3 classes: 'High's', 'Low's', 'Medium' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 1064, 1064, 1066, 1067, 1067, 1066, ... 
## Resampling results across tuning parameters:
## 
##   decay  Accuracy   Kappa    
##   0e+00  0.6175184  0.4264002
##   1e-04  0.6175184  0.4264002
##   1e-01  0.5836751  0.3756410
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was decay = 1e-04.
# Check cross-validation metrics
cv_results <- cv_model$results
print(cv_results)
##   decay  Accuracy     Kappa AccuracySD    KappaSD
## 1 0e+00 0.6175184 0.4264002 0.05269619 0.07907112
## 2 1e-04 0.6175184 0.4264002 0.05269619 0.07907112
## 3 1e-01 0.5836751 0.3756410 0.03719189 0.05591128
# Check variable importance (if available)
if ("varImp" %in% methods("train")) {
  var_imp <- varImp(cv_model, scale = FALSE)
  print(var_imp)
  plot(var_imp)
}

The multinomial logistic regression model was trained to predict Log_Worldwide_Gross_Category using predictors such as the log-transformed production budget, runtime categories, seasonal releases, MPAA ratings, genre counts, and specific genres. Evaluated with 10-fold cross-validation, the model achieved its highest accuracy of 61.75% with a decay value of 0.0001, accompanied by a Kappa statistic of 0.426, indicating moderate agreement between predictions and true values. Increased regularization (decay = 0.1) reduced accuracy to 58.37% and Kappa to 0.375, demonstrating that higher penalization adversely affects performance. The optimal model, selected at a decay of 0.0001, balances accuracy and complexity effectively. These results highlight the moderate predictive power of the model, suggesting potential for improvement by incorporating additional features or exploring alternative approaches.

Spliting the data

# Load necessary libraries for modeling and evaluation
library(caret)
library(glmnet)         # For Ridge and LASSO regression
## Warning: package 'glmnet' was built under R version 4.3.3
## Loading required package: Matrix
## Warning: package 'Matrix' was built under R version 4.3.1
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loaded glmnet 4.1-8
library(randomForest)    # For Random Forest model
## Warning: package 'randomForest' was built under R version 4.3.2
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin
## The following object is masked from 'package:dplyr':
## 
##     combine
library(xgboost)         # For Gradient Boosting model
## Warning: package 'xgboost' was built under R version 4.3.3
## 
## Attaching package: 'xgboost'
## The following object is masked from 'package:plotly':
## 
##     slice
## The following object is masked from 'package:dplyr':
## 
##     slice
library(Metrics)         # For evaluation metrics
## Warning: package 'Metrics' was built under R version 4.3.3
## 
## Attaching package: 'Metrics'
## The following objects are masked from 'package:caret':
## 
##     precision, recall
set.seed(123)  # For reproducibility

# Split the data
train_indices <- sample(1:nrow(data), size = 0.70 * nrow(data))
train_data <- data[train_indices, ]
test_data <- data[-train_indices, ]

Multinomial Logistic Regession

# Load the required library
library(nnet)

# Fit the multinomial logistic regression model
multinom_model <- multinom(
  Log_Worldwide_Gross_Category ~ Log_production_budget_adj + PG.13 + R + PG + G + 
    between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count + 
    Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime + 
    Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror + 
    Main_Mystery + Main_Romance + Main_Science_Fiction + Main_Thriller + Main_History, 
  data = train_data
)
## # weights:  84 (54 variable)
## initial  value 909.650975 
## iter  10 value 750.668060
## iter  20 value 679.033321
## iter  30 value 655.419161
## iter  40 value 647.812756
## iter  50 value 646.629405
## iter  60 value 646.572253
## iter  70 value 646.549992
## final  value 646.549600 
## converged
# View model summary
summary(multinom_model)
## Warning in sqrt(diag(vc)): NaNs produced
## Call:
## multinom(formula = Log_Worldwide_Gross_Category ~ Log_production_budget_adj + 
##     PG.13 + R + PG + G + between_90_to_135 + Greater_than_135 + 
##     Spring + Summer + Fall + genre_count + Main_Action + Main_Adventure + 
##     Main_Animation + Main_Comedy + Main_Crime + Main_Documentary + 
##     Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery + 
##     Main_Romance + Main_Science_Fiction + Main_Thriller + Main_History, 
##     data = train_data)
## 
## Coefficients:
##        (Intercept) Log_production_budget_adj      PG.13         R         PG
## Low's     54.02857                 -2.463531 -10.329502 -9.621212 -10.805911
## Medium    18.34683                 -1.387259   6.766761  7.029339   6.214177
##                 G between_90_to_135 Greater_than_135     Spring     Summer
## Low's  -60.990149         0.3013782      -0.09367208  0.2540701 -0.2641861
## Medium   7.020628        -0.1343854      -0.26478885 -0.1769784 -0.1774896
##              Fall genre_count Main_Action Main_Adventure Main_Animation
## Low's  -0.1130515   0.1934268   -2.913081      -1.912802      -2.165656
## Medium -0.1741066   0.2124052   -1.412170      -1.120253      -1.399071
##        Main_Comedy Main_Crime Main_Documentary Main_Drama Main_Family
## Low's    -2.764042  -2.379240        -2.011616  -2.376101   -2.853453
## Medium   -1.132786  -0.948553        -1.020207  -1.181713   -0.500758
##        Main_Fantasy Main_Horror Main_Mystery Main_Romance Main_Science_Fiction
## Low's     -3.443417   -5.360688    -4.408292    -1.725017           -3.5826158
## Medium    -1.512193   -2.384847    -1.206547     0.182188           -0.7490445
##        Main_Thriller Main_History
## Low's      -3.178588    -1.509629
## Medium     -1.541316   -37.452230
## 
## Std. Errors:
##        (Intercept) Log_production_budget_adj     PG.13         R        PG
## Low's     2.666312                 0.1877811 0.9094408 0.9018218 0.9492838
## Medium    2.500942                 0.1600283 0.6805335 0.6723222 0.6934545
##               G between_90_to_135 Greater_than_135    Spring    Summer
## Low's       NaN         0.4597054        0.7195260 0.3515061 0.3614150
## Medium 1.099029         0.3781664        0.5465227 0.2879162 0.2830429
##             Fall genre_count Main_Action Main_Adventure Main_Animation
## Low's  0.3332468   0.1337842    1.412635       1.511721       1.703619
## Medium 0.2712032   0.1094383    1.349323       1.397321       1.492783
##        Main_Comedy Main_Crime Main_Documentary Main_Drama Main_Family
## Low's     1.417273   1.469423         2.354495   1.407818    1.930024
## Medium    1.359308   1.401000         2.142253   1.354289    1.590654
##        Main_Fantasy Main_Horror Main_Mystery Main_Romance Main_Science_Fiction
## Low's      1.903964    1.498765     1.951964     1.631140             1.738463
## Medium     1.516519    1.408728     1.568679     1.542214             1.518612
##        Main_Thriller Main_History
## Low's       1.460582 1.992374e+00
## Medium      1.395043 2.262602e-14
## 
## Residual Deviance: 1293.099 
## AIC: 1401.099

The multinomial logistic regression was applied to classify the Log_Worldwide_Gross_Category into “High’s,” “Medium,” and “Low’s” based on predictors such as Log_production_budget_adj, runtime categories, season categories, MPAA ratings, and main genres. The model converged after 70 iterations with a residual deviance of 1293.099 and an AIC of 1401.099, indicating the model’s fit.

Key insights from the coefficients include:

Production Budget: Higher Log_production_budget_adj negatively impacts the probability of being classified in “Low’s” and “Medium” categories compared to the reference “High’s.” Genre Influence: Specific genres like Main_Horror and Main_Fantasy show strong negative associations with “Low’s” and “Medium” categories, suggesting these genres are less likely to result in low revenue. Runtime and Season Effects: The categories like “between 90 to 135 minutes” and Summer season do not show significant effects, as the coefficients are close to zero with high standard errors, indicating potential variability in their impact. MPAA Ratings: Ratings such as PG-13 and PG have strong associations, with PG-13 reducing the likelihood of belonging to the “Low’s” category. The standard errors for some coefficients are notably high, particularly for rare categories such as Main_History and Main_Documentary, which could indicate instability due to smaller representation in the dataset. Overall, the model provides reasonable predictions but may require additional refinement or regularization for improved generalizability.

Evaluation of Multinomial Logistic Regression Model

library(pROC)
## Warning: package 'pROC' was built under R version 4.3.2
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following object is masked from 'package:Metrics':
## 
##     auc
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
# Predict on the test data
test_data$predicted_categories <- predict(multinom_model, newdata = test_data, type = "class")

# Confusion Matrix
confusion_matrix_test <- confusionMatrix(
  data = factor(test_data$predicted_categories, levels = levels(test_data$Log_Worldwide_Gross_Category)),
  reference = factor(test_data$Log_Worldwide_Gross_Category)
)
print(confusion_matrix_test)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction High's Low's Medium
##     High's     84     6     35
##     Low's      10    81     29
##     Medium     29    27     55
## 
## Overall Statistics
##                                           
##                Accuracy : 0.618           
##                  95% CI : (0.5653, 0.6687)
##     No Information Rate : 0.3455          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.4268          
##                                           
##  Mcnemar's Test P-Value : 0.6517          
## 
## Statistics by Class:
## 
##                      Class: High's Class: Low's Class: Medium
## Sensitivity                 0.6829       0.7105        0.4622
## Specificity                 0.8240       0.8388        0.7637
## Pos Pred Value              0.6720       0.6750        0.4955
## Neg Pred Value              0.8312       0.8602        0.7388
## Prevalence                  0.3455       0.3202        0.3343
## Detection Rate              0.2360       0.2275        0.1545
## Detection Prevalence        0.3511       0.3371        0.3118
## Balanced Accuracy           0.7535       0.7747        0.6129
# ROC Curve and AUC for each class
roc_list_test <- list()
auc_list_test <- list()
categories <- levels(test_data$Log_Worldwide_Gross_Category)

for (category in categories) {
  # Create binary response for "One-vs-All"
  true_binary <- ifelse(test_data$Log_Worldwide_Gross_Category == category, 1, 0)
  predicted_probs <- predict(multinom_model, newdata = test_data, type = "probs")[, category]
  
  # ROC Curve
  roc_obj_test <- roc(true_binary, predicted_probs)
  roc_list_test[[category]] <- roc_obj_test
  auc_list_test[[category]] <- auc(roc_obj_test)
  
  # Plot ROC Curve for this class
  plot(roc_obj_test, main = paste("ROC Curve for", category, "on Test Data"), col = "blue")
  abline(a = 0, b = 1, col = "gray", lty = 2) # Reference line
  cat("AUC for", category, "on Test Data:", auc_list_test[[category]], "\n")
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for High's on Test Data: 0.8448306
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## AUC for Low's on Test Data: 0.8601747
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## AUC for Medium on Test Data: 0.6920009
# Lift Chart for each category
for (category in categories) {
  # Sort data by predicted probabilities for the chosen category
  test_data <- test_data %>%
    arrange(desc(predict(multinom_model, newdata = test_data, type = "probs")[, category]))
  
  # Add deciles for the chosen category
  test_data$decile <- ntile(predict(multinom_model, newdata = test_data, type = "probs")[, category], 10)
  
  # Calculate Lift
  lift_table_test <- test_data %>%
    group_by(decile) %>%
    summarize(
      total = n(),
      events = sum(Log_Worldwide_Gross_Category == category),
      cumulative_events = cumsum(events),
      cumulative_percentage = cumulative_events / sum(Log_Worldwide_Gross_Category == category)
    )
  
  # Plot Lift Chart for the current category
  plot(
    lift_table_test$decile, lift_table_test$cumulative_percentage,
    type = "o", col = "blue", xlab = "Decile", ylab = "Cumulative Gain",
    main = paste("Lift Chart for", category, "on Test Data")
  )
  abline(0, 0.1, col = "gray", lty = 2) # Reference random line
}

Interpretation of Multinomial Logistic Regression Model with Lift Charts and ROC Analysis The multinomial logistic regression model shows an overall accuracy of 61.8% with a Kappa statistic of 0.4268, which indicates moderate agreement between predicted and observed categories. The model’s performance varies by class, with the sensitivity for “High’s” at 68.29%, “Low’s” at 71.05%, and “Medium” at 46.22%. The specificity values suggest the model can reasonably distinguish between classes, with “High’s” and “Low’s” achieving 82.4% and 83.88%, respectively. The positive predictive values (PPVs) indicate that predictions for “High’s” and “Low’s” are more reliable compared to “Medium,” which has lower sensitivity and PPV.

The ROC analysis reveals that the model performs best for “Low’s” with an AUC of 0.86, followed by “High’s” with 0.8448, and “Medium” lagging at 0.692. This suggests the model is more effective at classifying “High’s” and “Low’s” compared to “Medium,” where there is room for improvement. The lift charts for “High’s,” “Low’s,” and “Medium” classes indicate that the model’s cumulative gains are consistent with a random classifier, suggesting that its ability to rank predictions might not be significantly better than random.

While the model shows reasonable discrimination for “High’s” and “Low’s,” the “Medium” category presents a challenge due to its lower balanced accuracy and AUC. This discrepancy might stem from overlapping features between “Medium” and the other categories, requiring further feature engineering or exploration of alternative algorithms. Overall, the model is moderately effective but could benefit from refinement, especially in handling the “Medium” class predictions.

Random Forest

# Load the required library
library(randomForest)

# Train Random Forest Model for Classification
rf_model <- randomForest(
  Log_Worldwide_Gross_Category ~ Log_production_budget_adj + PG.13 + R + PG + G + 
    between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count + 
    Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime + 
    Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror + 
    Main_Mystery + Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,
  data = train_data,
  ntree = 500,        # Number of trees
  mtry = 5,           # Number of predictors randomly selected at each split
  importance = TRUE,  # Calculate variable importance
  proximity = TRUE    # Enable proximity matrix for better insights
)

# View the model summary
print(rf_model)
## 
## Call:
##  randomForest(formula = Log_Worldwide_Gross_Category ~ Log_production_budget_adj +      PG.13 + R + PG + G + between_90_to_135 + Greater_than_135 +      Spring + Summer + Fall + genre_count + Main_Action + Main_Adventure +      Main_Animation + Main_Comedy + Main_Crime + Main_Documentary +      Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery +      Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,      data = train_data, ntree = 500, mtry = 5, importance = TRUE,      proximity = TRUE) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 5
## 
##         OOB estimate of  error rate: 42.27%
## Confusion matrix:
##        High's Low's Medium class.error
## High's    174    27     70   0.3579336
## Low's      25   189     67   0.3274021
## Medium     73    88    115   0.5833333
# Feature Importance
importance(rf_model)
##                                High's      Low's     Medium
## Log_production_budget_adj 54.23404066 58.3964024 16.6099620
## PG.13                      7.23391587  5.8022759  0.1428070
## R                         10.66811130  8.4856570 -0.6918324
## PG                         9.58714900  3.8205036 -2.7065644
## G                         -1.95595955  4.3647583  0.6583091
## between_90_to_135          3.45872879 -3.9251462  1.0197417
## Greater_than_135           5.41925679  6.1085477  2.0768173
## Spring                    -2.22544249  3.2519120 -0.2645075
## Summer                    -0.40877224  2.4932487 -2.8785964
## Fall                      -1.86660551  2.9856875  2.8811094
## genre_count                8.28023430  3.6306634 -0.6670277
## Main_Action               13.74783553 10.7553413 -1.8980100
## Main_Adventure             4.98002373  3.9423308 -1.2316089
## Main_Animation             5.36253608 -0.1803412 -4.8961871
## Main_Comedy                6.71286257  2.1285906 -3.2960200
## Main_Crime                 2.30217102  1.1251912 -1.2319112
## Main_Documentary           1.29123649  3.5155053 -2.3727848
## Main_Drama                 7.28313497  6.0722752 -4.5410771
## Main_Family               -4.68116187 -3.4223355 -1.6401665
## Main_Fantasy              -1.79537739 -0.5137727 -2.3141812
## Main_Horror                3.55771593 -7.2675268  3.3127806
## Main_Mystery              -3.21265432  3.1600207  2.5766902
## Main_History              -0.01441111  1.6539941  4.8726468
## Main_Romance               8.32859461  1.1203048 -0.3211193
## Main_Science_Fiction       1.69248262 -4.2542209  2.8594548
## Main_Thriller              3.53457829  3.0926812 -4.4366351
##                           MeanDecreaseAccuracy MeanDecreaseGini
## Log_production_budget_adj           66.5823632      161.6311816
## PG.13                                9.4199603        7.0313273
## R                                   12.5720032       11.9967875
## PG                                   8.3055515        6.5648208
## G                                    0.9334809        1.2709370
## between_90_to_135                    0.3034235        9.3321449
## Greater_than_135                     7.8985043        4.8645679
## Spring                               0.7873428        9.3040851
## Summer                              -0.4483334        9.3873777
## Fall                                 3.1305884        9.3192971
## genre_count                          6.5357738       28.0454828
## Main_Action                         14.6850374       10.5656676
## Main_Adventure                       4.8134016        4.0267356
## Main_Animation                       2.2254825        1.8529323
## Main_Comedy                          2.9902262        7.0837292
## Main_Crime                           1.0450720        4.5569799
## Main_Documentary                     1.9491483        1.4695295
## Main_Drama                           6.1428315        9.3212568
## Main_Family                         -5.6971242        0.9853271
## Main_Fantasy                        -2.8793950        2.0103055
## Main_Horror                         -0.9534465        6.5876137
## Main_Mystery                         1.8732446        2.5944616
## Main_History                         2.7409357        2.1725445
## Main_Romance                         4.8958916        4.8982248
## Main_Science_Fiction                 0.2847141        2.6531996
## Main_Thriller                        1.0071114        5.4797721
varImpPlot(rf_model) # Plot variable importance

The Random Forest model for classifying the “Log_Worldwide_Gross_Category” demonstrates an overall out-of-bag (OOB) error rate of 42.27%, indicating that the model struggles to classify certain instances accurately. The class-specific errors reveal that “High’s” and “Low’s” are classified with relatively lower error rates of 35.79% and 32.74%, respectively, whereas the “Medium” category has a much higher error rate of 58.33%. This disparity highlights the model’s difficulty in distinguishing “Medium” instances, likely due to feature overlap with other categories.

The variable importance plot reveals that “Log_production_budget_adj” is the most significant predictor, contributing the most to both accuracy and the Gini impurity reduction. Other notable predictors include “genre_count,” “Main_Action,” “Greater_than_135,” and “Main_Romance.” These variables are likely strong indicators of the differences between the gross categories. Conversely, features such as “Main_Fantasy,” “Main_Family,” and “Main_Animation” contribute less, suggesting they may be less relevant for this classification task.

Despite its ability to leverage multiple predictors and provide insights into feature importance, the Random Forest model exhibits moderate classification performance.

Evaluation of the Random Forest Model

# Predict on the test data
test_data$rf_predicted_categories <- predict(rf_model, newdata = test_data)

# Confusion Matrix
rf_confusion_matrix_test <- confusionMatrix(
  data = factor(test_data$rf_predicted_categories, levels = levels(test_data$Log_Worldwide_Gross_Category)),
  reference = factor(test_data$Log_Worldwide_Gross_Category)
)
print(rf_confusion_matrix_test)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction High's Low's Medium
##     High's     85    10     34
##     Low's      12    77     36
##     Medium     26    27     49
## 
## Overall Statistics
##                                           
##                Accuracy : 0.5927          
##                  95% CI : (0.5397, 0.6442)
##     No Information Rate : 0.3455          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.389           
##                                           
##  Mcnemar's Test P-Value : 0.4691          
## 
## Statistics by Class:
## 
##                      Class: High's Class: Low's Class: Medium
## Sensitivity                 0.6911       0.6754        0.4118
## Specificity                 0.8112       0.8017        0.7764
## Pos Pred Value              0.6589       0.6160        0.4804
## Neg Pred Value              0.8326       0.8398        0.7244
## Prevalence                  0.3455       0.3202        0.3343
## Detection Rate              0.2388       0.2163        0.1376
## Detection Prevalence        0.3624       0.3511        0.2865
## Balanced Accuracy           0.7511       0.7385        0.5941
# ROC Curve and AUC for each class
rf_roc_list_test <- list()
rf_auc_list_test <- list()

for (category in categories) {
  # Create binary response for "One-vs-All"
  true_binary <- ifelse(test_data$Log_Worldwide_Gross_Category == category, 1, 0)
  rf_predicted_probs <- predict(rf_model, newdata = test_data, type = "prob")[, category]
  
  # ROC Curve
  rf_roc_obj_test <- roc(true_binary, rf_predicted_probs)
  rf_roc_list_test[[category]] <- rf_roc_obj_test
  rf_auc_list_test[[category]] <- auc(rf_roc_obj_test)
  
  # Plot ROC Curve for this class
  plot(rf_roc_obj_test, main = paste("ROC Curve for", category, "on Test Data"), col = "red")
  abline(a = 0, b = 1, col = "gray", lty = 2) # Reference line
  cat("AUC for", category, "on Test Data:", rf_auc_list_test[[category]], "\n")
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for High's on Test Data: 0.8363341
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## AUC for Low's on Test Data: 0.8235283
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## AUC for Medium on Test Data: 0.6586888
# Lift Chart for each category
for (category in categories) {
  # Sort data by predicted probabilities for the chosen category
  test_data <- test_data %>%
    arrange(desc(predict(rf_model, newdata = test_data, type = "prob")[, category]))

  # Add deciles for the chosen category
  test_data$rf_decile <- ntile(predict(rf_model, newdata = test_data, type = "prob")[, category], 10)
  
  # Calculate Lift
  rf_lift_table_test <- test_data %>%
    group_by(rf_decile) %>%
    summarize(
      total = n(),
      events = sum(Log_Worldwide_Gross_Category == category),
      cumulative_events = cumsum(events),
      cumulative_percentage = cumulative_events / sum(Log_Worldwide_Gross_Category == category)
    )
  
  # Plot Lift Chart for the current category
  plot(
    rf_lift_table_test$rf_decile, rf_lift_table_test$cumulative_percentage,
    type = "o", col = "red", xlab = "Decile", ylab = "Cumulative Gain",
    main = paste("Lift Chart for", category, "on Test Data")
  )
  abline(0, 0.1, col = "gray", lty = 2) # Reference random line
}

The random forest model for classifying “Log_Worldwide_Gross_Category” achieved an overall accuracy of 59.27% on the test data with a Kappa statistic of 0.389, indicating moderate agreement beyond chance. The AUC values for “High’s,” “Low’s,” and “Medium” categories were 0.836, 0.824, and 0.659, respectively. These AUC values suggest the model performs well for distinguishing “High’s” and “Low’s” but struggles with “Medium” predictions.

From the confusion matrix, the sensitivity (true positive rate) was highest for “High’s” (69.11%), followed by “Low’s” (67.54%) and “Medium” (41.18%). Specificity (true negative rate) was consistently higher across classes, indicating the model’s strength in identifying negative cases. The lift charts show relatively stable cumulative gains across deciles for all categories, which suggests modest differentiation capability for high-probability deciles. However, the random forest exhibited challenges in perfectly separating “Medium” due to overlapping class probabilities, as reflected in the lower sensitivity and AUC for this category.

The random forest model demonstrates reliable performance for the “High’s” and “Low’s” categories. The variable importance plot highlights “Log_production_budget_adj” and “genre_count” as the top predictors, emphasizing their significance in determining the gross revenue category.

XGBoost

# Load necessary libraries
library(xgboost)

# Prepare data for XGBoost
x_train <- model.matrix(
  Log_Worldwide_Gross_Category ~ Log_production_budget_adj + PG.13 + R + PG + G + 
    between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count + 
    Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime + 
    Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror + 
    Main_Mystery + Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,
  data = train_data
)[, -1] # Remove intercept column

# Encode the target variable as numeric (0-based for XGBoost)
y_train <- as.numeric(train_data$Log_Worldwide_Gross_Category) - 1

# Train the XGBoost model for multi-class classification
xgb_model <- xgboost(
  data = as.matrix(x_train),
  label = y_train,
  objective = "multi:softprob",  # Multi-class classification
  num_class = length(levels(train_data$Log_Worldwide_Gross_Category)), # Number of classes
  nrounds = 100,                 # Number of boosting rounds
  max_depth = 6,                 # Tree depth
  eta = 0.1,                     # Learning rate
  colsample_bytree = 0.8,        # Subsample ratio of columns
  verbose = 1                    # Print training progress
)
## [1]  train-mlogloss:1.044219 
## [2]  train-mlogloss:1.014600 
## [3]  train-mlogloss:0.981189 
## [4]  train-mlogloss:0.951196 
## [5]  train-mlogloss:0.923517 
## [6]  train-mlogloss:0.889304 
## [7]  train-mlogloss:0.860176 
## [8]  train-mlogloss:0.842175 
## [9]  train-mlogloss:0.822921 
## [10] train-mlogloss:0.800008 
## [11] train-mlogloss:0.780153 
## [12] train-mlogloss:0.766195 
## [13] train-mlogloss:0.748585 
## [14] train-mlogloss:0.735677 
## [15] train-mlogloss:0.720366 
## [16] train-mlogloss:0.710524 
## [17] train-mlogloss:0.697816 
## [18] train-mlogloss:0.688281 
## [19] train-mlogloss:0.677437 
## [20] train-mlogloss:0.667779 
## [21] train-mlogloss:0.662752 
## [22] train-mlogloss:0.656260 
## [23] train-mlogloss:0.647030 
## [24] train-mlogloss:0.638480 
## [25] train-mlogloss:0.630084 
## [26] train-mlogloss:0.622255 
## [27] train-mlogloss:0.615014 
## [28] train-mlogloss:0.608129 
## [29] train-mlogloss:0.603235 
## [30] train-mlogloss:0.596009 
## [31] train-mlogloss:0.591830 
## [32] train-mlogloss:0.585533 
## [33] train-mlogloss:0.581276 
## [34] train-mlogloss:0.577733 
## [35] train-mlogloss:0.574517 
## [36] train-mlogloss:0.571123 
## [37] train-mlogloss:0.566720 
## [38] train-mlogloss:0.563134 
## [39] train-mlogloss:0.559574 
## [40] train-mlogloss:0.555446 
## [41] train-mlogloss:0.551674 
## [42] train-mlogloss:0.547738 
## [43] train-mlogloss:0.543440 
## [44] train-mlogloss:0.539785 
## [45] train-mlogloss:0.536030 
## [46] train-mlogloss:0.533126 
## [47] train-mlogloss:0.530018 
## [48] train-mlogloss:0.528901 
## [49] train-mlogloss:0.526127 
## [50] train-mlogloss:0.523214 
## [51] train-mlogloss:0.521795 
## [52] train-mlogloss:0.519638 
## [53] train-mlogloss:0.517398 
## [54] train-mlogloss:0.515160 
## [55] train-mlogloss:0.512116 
## [56] train-mlogloss:0.510168 
## [57] train-mlogloss:0.508645 
## [58] train-mlogloss:0.506789 
## [59] train-mlogloss:0.504495 
## [60] train-mlogloss:0.502816 
## [61] train-mlogloss:0.501197 
## [62] train-mlogloss:0.499407 
## [63] train-mlogloss:0.498239 
## [64] train-mlogloss:0.496254 
## [65] train-mlogloss:0.494156 
## [66] train-mlogloss:0.492314 
## [67] train-mlogloss:0.489519 
## [68] train-mlogloss:0.487633 
## [69] train-mlogloss:0.485190 
## [70] train-mlogloss:0.483455 
## [71] train-mlogloss:0.482276 
## [72] train-mlogloss:0.480786 
## [73] train-mlogloss:0.479804 
## [74] train-mlogloss:0.478515 
## [75] train-mlogloss:0.476099 
## [76] train-mlogloss:0.474205 
## [77] train-mlogloss:0.472692 
## [78] train-mlogloss:0.471695 
## [79] train-mlogloss:0.469626 
## [80] train-mlogloss:0.468345 
## [81] train-mlogloss:0.466787 
## [82] train-mlogloss:0.465979 
## [83] train-mlogloss:0.464638 
## [84] train-mlogloss:0.462384 
## [85] train-mlogloss:0.460966 
## [86] train-mlogloss:0.459638 
## [87] train-mlogloss:0.458490 
## [88] train-mlogloss:0.457232 
## [89] train-mlogloss:0.455864 
## [90] train-mlogloss:0.454534 
## [91] train-mlogloss:0.453183 
## [92] train-mlogloss:0.452209 
## [93] train-mlogloss:0.450001 
## [94] train-mlogloss:0.449032 
## [95] train-mlogloss:0.448240 
## [96] train-mlogloss:0.446901 
## [97] train-mlogloss:0.445530 
## [98] train-mlogloss:0.443044 
## [99] train-mlogloss:0.440377 
## [100]    train-mlogloss:0.438573
# Feature importance
importance <- xgb.importance(feature_names = colnames(x_train), model = xgb_model)
print(importance)
##                       Feature         Gain       Cover   Frequency
##  1: Log_production_budget_adj 0.6571360958 0.540024484 0.444935869
##  2:               genre_count 0.0597556999 0.076114894 0.102167183
##  3:               Main_Horror 0.0284012203 0.035320571 0.031107180
##  4:                      Fall 0.0240605515 0.012550804 0.050567595
##  5:                         R 0.0228187565 0.021416049 0.025947221
##  6:               Main_Comedy 0.0217120987 0.014437870 0.027126640
##  7:                    Summer 0.0210226708 0.018750205 0.039510541
##  8:                     PG.13 0.0207653218 0.013231485 0.036856848
##  9:               Main_Action 0.0201254750 0.017748169 0.024620374
## 10:                Main_Drama 0.0184599494 0.014212020 0.019607843
## 11:         between_90_to_135 0.0181312740 0.012659661 0.033466018
## 12:                    Spring 0.0142384274 0.015689948 0.031991744
## 13:              Main_Romance 0.0127818434 0.033868427 0.016659295
## 14:             Main_Thriller 0.0118235010 0.013440066 0.018280997
## 15:                Main_Crime 0.0112919867 0.016242604 0.017838714
## 16:                        PG 0.0079206403 0.007379108 0.014300457
## 17:          Greater_than_135 0.0073522408 0.009674671 0.010025063
## 18:              Main_History 0.0053744942 0.034047648 0.010025063
## 19:            Main_Adventure 0.0053313633 0.013976867 0.011204482
## 20:      Main_Science_Fiction 0.0035834036 0.029034035 0.007961079
## 21:              Main_Mystery 0.0019436315 0.008069030 0.004570249
## 22:                         G 0.0017177299 0.013592940 0.005012531
## 23:              Main_Fantasy 0.0013849323 0.006005137 0.006929087
## 24:               Main_Family 0.0011820761 0.017149667 0.005749668
## 25:          Main_Documentary 0.0009270498 0.002544168 0.001621701
## 26:            Main_Animation 0.0007575660 0.002819470 0.001916556
##                       Feature         Gain       Cover   Frequency
xgb.plot.importance(importance)

The XGBoost model for multi-class classification highlights the feature “Log_production_budget_adj” as the most critical predictor with the highest gain (65.7%), indicating its dominant role in distinguishing revenue categories. Other significant contributors include “genre_count” (5.9%), “Main_Horror” (2.8%), and “Fall” (2.4%). Gain represents the improvement in model accuracy attributed to a feature, while cover measures how often a feature is used in splits. Despite the dominance of “Log_production_budget_adj,” the contributions of genre-related and temporal features suggest revenue prediction is also influenced by qualitative content and seasonal timing. XGBoost’s flexible tree structure captures interactions effectively, offering a robust performance for multi-class categorization. This feature importance plot underscores the multi-dimensional nature of the revenue categories.

Evaluation of XGBoost Model

# Prepare test data matrix
x_test <- model.matrix(
  Log_Worldwide_Gross_Category ~ Log_production_budget_adj + PG.13 + R + PG + G + 
    between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count + 
    Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime + 
    Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror + 
    Main_Mystery + Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,
  data = test_data
)[, -1]

# Encode test labels as numeric (0-based)
y_test <- as.numeric(test_data$Log_Worldwide_Gross_Category) - 1

# Predict probabilities for test data
pred_probs <- predict(xgb_model, newdata = x_test)

# Reshape predicted probabilities into a matrix
pred_matrix <- matrix(pred_probs, nrow = nrow(x_test), byrow = TRUE)

# Get predicted classes (1-based indexing)
pred_classes <- max.col(pred_matrix) - 1  # Convert from 1-based to 0-based indexing for compatibility

# Convert to factor using the original levels of the dependent variable
test_data$predicted_categories <- factor(pred_classes, labels = levels(test_data$Log_Worldwide_Gross_Category))

cat("Rows in predicted categories:", length(test_data$predicted_categories), "\n")
## Rows in predicted categories: 356
cat("Rows in test data:", nrow(test_data), "\n")
## Rows in test data: 356
library(caret)
confusion_matrix <- confusionMatrix(
  data = test_data$predicted_categories,
  reference = test_data$Log_Worldwide_Gross_Category
)
print(confusion_matrix)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction High's Low's Medium
##     High's     88     6     30
##     Low's       6    72     26
##     Medium     29    36     63
## 
## Overall Statistics
##                                           
##                Accuracy : 0.6264          
##                  95% CI : (0.5739, 0.6768)
##     No Information Rate : 0.3455          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.439           
##                                           
##  Mcnemar's Test P-Value : 0.6526          
## 
## Statistics by Class:
## 
##                      Class: High's Class: Low's Class: Medium
## Sensitivity                 0.7154       0.6316        0.5294
## Specificity                 0.8455       0.8678        0.7257
## Pos Pred Value              0.7097       0.6923        0.4922
## Neg Pred Value              0.8491       0.8333        0.7544
## Prevalence                  0.3455       0.3202        0.3343
## Detection Rate              0.2472       0.2022        0.1770
## Detection Prevalence        0.3483       0.2921        0.3596
## Balanced Accuracy           0.7805       0.7497        0.6276
# ROC Curve and AUC for each class
library(pROC)

# Initialize lists to store results
roc_list <- list()
auc_list <- list()

# Iterate through categories
for (category in categories) {
  # Create binary response for "One-vs-All"
  true_binary <- ifelse(test_data$Log_Worldwide_Gross_Category == category, 1, 0)
  
  # Extract predicted probabilities for the current category
  predicted_probs <- pred_matrix[, which(categories == category)]
  
  # ROC Curve
  roc_obj <- roc(true_binary, predicted_probs)
  roc_list[[category]] <- roc_obj
  auc_list[[category]] <- auc(roc_obj)
  
  # Plot ROC Curve
  plot(roc_obj, main = paste("ROC Curve for", category), col = "blue")
  abline(a = 0, b = 1, col = "gray", lty = 2) # Reference line
  cat("AUC for", category, ":", auc_list[[category]], "\n")
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for High's : 0.8632018
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## AUC for Low's : 0.8365412
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## AUC for Medium : 0.6993937
# Lift Chart for each category
library(dplyr)

for (category in categories) {
  # Sort data by predicted probabilities for the chosen category
  test_data <- test_data %>%
    arrange(desc(pred_matrix[, which(categories == category)]))
  
  # Add deciles for the chosen category
  test_data$decile <- ntile(pred_matrix[, which(categories == category)], 10)
  
  # Calculate Lift
  lift_table <- test_data %>%
    group_by(decile) %>%
    summarize(
      total = n(),
      events = sum(Log_Worldwide_Gross_Category == category),
      cumulative_events = cumsum(events),
      cumulative_percentage = cumulative_events / sum(Log_Worldwide_Gross_Category == category)
    )
  
  # Plot Lift Chart
  plot(
    lift_table$decile, lift_table$cumulative_percentage,
    type = "o", col = "blue", xlab = "Decile", ylab = "Cumulative Gain",
    main = paste("Lift Chart for", category)
  )
  abline(0, 0.1, col = "gray", lty = 2) # Reference line
}

The XGBoost model’s performance for predicting the Log_Worldwide_Gross_Category on the test data is moderately successful, with an overall accuracy of 62.64% and a Kappa statistic of 0.439, reflecting moderate agreement between predicted and actual categories. Here’s a breakdown of its key performance indicators and observations:

Overall Performance:

The model correctly classified 62.64% of the test instances, exceeding the no-information rate of 34.55%. The Kappa value of 0.439 indicates a reasonable level of agreement between predictions and actual categories after accounting for random chance.

Class-Specific Metrics:

“High’s” Class: Sensitivity: 71.54% – the model correctly identified 71.54% of the true “High’s”. Specificity: 84.55% – it correctly rejected 84.55% of non-“High’s”. AUC: 0.863 – excellent performance in distinguishing “High’s” from other categories.

“Low’s” Class: Sensitivity: 63.16% – the model identified 63.16% of the true “Low’s”. Specificity: 86.78% – strong at rejecting non-“Low’s”. AUC: 0.837 – good discriminative power for this class.

“Medium” Class: Sensitivity: 52.94% – weakest performance in identifying “Medium”. Specificity: 72.57% – moderate ability to correctly reject non-“Medium”. AUC: 0.699 – fair performance but less effective than for the other two classes.

Feature Importance:

The most influential feature is Log_production_budget_adj, contributing 65.71% gain, indicating that production budgets are a significant determinant of a movie’s gross category. genre_count ranks second with a gain of 5.98%, showing that the number of genres in a movie also impacts its gross category. Other notable features include Main_Horror, Fall, and R, though their contributions are considerably smaller. Confusion Matrix Insights:

The model performed best on the “High’s” category, correctly classifying 88 out of 123 instances. Misclassifications are most frequent in the “Medium” category, where 65 instances were incorrectly labeled as “High’s” or “Low’s”.

Lift Charts:

The lift charts for all categories show that the model’s predictions do not significantly exceed random prediction in higher deciles. This indicates that while the model performs moderately well, it has limited power in prioritizing instances with high probabilities of being correctly classified.

polynonial

# Fit a polynomial logistic regression model
library(nnet)

polynomial_logistic_model <- multinom(
  Log_Worldwide_Gross_Category ~ poly(Log_production_budget_adj, degree = 2) +
    PG.13 + R + PG + G + between_90_to_135 + Greater_than_135 + 
    Spring + Summer + Fall + genre_count + Main_Action + Main_Adventure + 
    Main_Animation + Main_Comedy + Main_Crime + Main_Documentary + 
    Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery + Main_History + 
    Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,
  data = train_data,
  maxit = 1000 # Increase maximum iterations if convergence is slow
)
## # weights:  87 (56 variable)
## initial  value 909.650975 
## iter  10 value 798.718851
## iter  20 value 708.026565
## iter  30 value 652.817268
## iter  40 value 637.902038
## iter  50 value 634.820394
## iter  60 value 634.569618
## iter  70 value 634.561714
## final  value 634.555551 
## converged
# View model summary
summary(polynomial_logistic_model)
## Call:
## multinom(formula = Log_Worldwide_Gross_Category ~ poly(Log_production_budget_adj, 
##     degree = 2) + PG.13 + R + PG + G + between_90_to_135 + Greater_than_135 + 
##     Spring + Summer + Fall + genre_count + Main_Action + Main_Adventure + 
##     Main_Animation + Main_Comedy + Main_Crime + Main_Documentary + 
##     Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery + 
##     Main_History + Main_History + Main_Romance + Main_Science_Fiction + 
##     Main_Thriller, data = train_data, maxit = 1000)
## 
## Coefficients:
##        (Intercept) poly(Log_production_budget_adj, degree = 2)1
## Low's     7.057289                                    -75.27698
## Medium   -1.896867                                    -31.67634
##        poly(Log_production_budget_adj, degree = 2)2     PG.13         R
## Low's                                     -29.28044 -5.154952 -4.476388
## Medium                                    -26.87965  3.349820  3.573899
##               PG          G between_90_to_135 Greater_than_135     Spring
## Low's  -5.630926 -12.086795         0.2828976      -0.02843191  0.2644157
## Medium  2.777515   3.595399        -0.1413354      -0.19822236 -0.1570733
##            Summer       Fall genre_count Main_Action Main_Adventure
## Low's  -0.2203033 -0.1295543   0.2275012   -3.132174      -1.861444
## Medium -0.1345413 -0.1799153   0.2441743   -1.583681      -1.012582
##        Main_Animation Main_Comedy Main_Crime Main_Documentary Main_Drama
## Low's       -2.309754   -3.061335  -2.637638       -2.0951053  -2.681637
## Medium      -1.498046   -1.359556  -1.155372       -0.9881132  -1.430312
##        Main_Family Main_Fantasy Main_Horror Main_Mystery Main_History
## Low's   -3.1743580    -3.478602   -5.301919    -4.726698    -1.668444
## Medium  -0.7303407    -1.500829   -2.329913    -1.441521   -10.235846
##        Main_Romance Main_Science_Fiction Main_Thriller
## Low's     -2.066832           -3.5918702     -3.479934
## Medium    -0.108060           -0.6376244     -1.762021
## 
## Std. Errors:
##        (Intercept) poly(Log_production_budget_adj, degree = 2)1
## Low's    31.440511                                     6.542746
## Medium    7.968938                                     5.940044
##        poly(Log_production_budget_adj, degree = 2)2     PG.13         R
## Low's                                      4.779280 31.402599 31.401985
## Medium                                     5.373912  7.892837  7.893843
##               PG         G between_90_to_135 Greater_than_135    Spring
## Low's  31.404980 41.126023         0.4501671        0.7233987 0.3589477
## Medium  7.893714  7.937537         0.3784103        0.5563157 0.2959257
##           Summer      Fall genre_count Main_Action Main_Adventure
## Low's  0.3680146 0.3394367   0.1364965    1.508249       1.612416
## Medium 0.2919421 0.2773746   0.1122283    1.416612       1.466495
##        Main_Animation Main_Comedy Main_Crime Main_Documentary Main_Drama
## Low's        1.806828    1.513753   1.564187         2.400066   1.504440
## Medium       1.568365    1.426653   1.466844         2.216188   1.421635
##        Main_Family Main_Fantasy Main_Horror Main_Mystery Main_History
## Low's     2.016163     1.988465    1.577897     2.025014     2.159766
## Medium    1.662983     1.588028    1.468938     1.632632    63.086958
##        Main_Romance Main_Science_Fiction Main_Thriller
## Low's      1.708711             1.839572      1.556529
## Medium     1.599106             1.608725      1.463149
## 
## Residual Deviance: 1269.111 
## AIC: 1381.111

The polynomial logistic regression model successfully introduced a second-degree term for the Log_production_budget_adj variable to account for potential nonlinear relationships between predictors and the target variable.

Model Fit The model converged after 70 iterations, achieving a residual deviance of 1269.111 and an AIC of 1381.111. These values indicate a reasonable fit, with reduced deviance compared to linear models, suggesting that the inclusion of the polynomial term captures additional variance.

Key Predictors

Log_production_budget_adj (Polynomial Term):

The first-degree polynomial term is significantly negative for “Low’s” (-75.28) and “Medium” (-31.68), indicating that increasing production budget reduces the likelihood of these categories.

The second-degree term is also negative but smaller in magnitude, confirming a diminishing nonlinear effect of the budget on the likelihood of belonging to “Low’s” and “Medium.”

Categorical Variables:

The MPAA rating categories (PG.13, R, PG, G) have consistent impacts across the outcome categories. For instance, the coefficients for PG.13 show a strong negative impact on “Low’s” (-5.15) while being moderately positive for “Medium” (3.35). Genre-based variables like Main_Comedy, Main_Horror, and Main_Action show notable negative effects on “Low’s” and smaller effects for “Medium.”

Seasonal Variables:

Features like Spring and Fall have small coefficients, indicating limited impact on classification outcomes.

Evaluation of Polynomial Logistic Regression

# Predict categories on the test data
test_data$predicted_categories <- predict(polynomial_logistic_model, newdata = test_data)

# Predict probabilities for each class
predicted_probabilities <- predict(polynomial_logistic_model, newdata = test_data, type = "probs")

# Confusion Matrix
library(caret)

confusion_matrix <- confusionMatrix(
  data = factor(test_data$predicted_categories, levels = levels(test_data$Log_Worldwide_Gross_Category)),
  reference = test_data$Log_Worldwide_Gross_Category
)
print(confusion_matrix)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction High's Low's Medium
##     High's     79     4     29
##     Low's      10    80     29
##     Medium     34    30     61
## 
## Overall Statistics
##                                           
##                Accuracy : 0.618           
##                  95% CI : (0.5653, 0.6687)
##     No Information Rate : 0.3455          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.4272          
##                                           
##  Mcnemar's Test P-Value : 0.3939          
## 
## Statistics by Class:
## 
##                      Class: High's Class: Low's Class: Medium
## Sensitivity                 0.6423       0.7018        0.5126
## Specificity                 0.8584       0.8388        0.7300
## Pos Pred Value              0.7054       0.6723        0.4880
## Neg Pred Value              0.8197       0.8565        0.7489
## Prevalence                  0.3455       0.3202        0.3343
## Detection Rate              0.2219       0.2247        0.1713
## Detection Prevalence        0.3146       0.3343        0.3511
## Balanced Accuracy           0.7503       0.7703        0.6213
library(pROC)

# ROC Curve and AUC for each class
roc_list <- list()
auc_list <- list()
categories <- levels(test_data$Log_Worldwide_Gross_Category)

for (category in categories) {
  # Create binary response for "One-vs-All"
  true_binary <- ifelse(test_data$Log_Worldwide_Gross_Category == category, 1, 0)
  predicted_probs <- predicted_probabilities[, category]
  
  # ROC Curve
  roc_obj <- roc(true_binary, predicted_probs)
  roc_list[[category]] <- roc_obj
  auc_list[[category]] <- auc(roc_obj)
  
  # Plot ROC Curve
  plot(roc_obj, main = paste("ROC Curve for", category), col = "blue")
  abline(a = 0, b = 1, col = "gray", lty = 2)
  cat("AUC for", category, ":", auc_list[[category]], "\n")
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for High's : 0.8523326
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## AUC for Low's : 0.8615884
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## AUC for Medium : 0.7074957
# Lift Chart for each category
library(dplyr)

for (category in categories) {
  # Sort data by predicted probabilities for the chosen category
  test_data <- test_data %>%
    arrange(desc(predicted_probabilities[, which(categories == category)]))
  
  # Add deciles for the chosen category
  test_data$decile <- ntile(predicted_probabilities[, which(categories == category)], 10)
  
  # Calculate Lift
  lift_table <- test_data %>%
    group_by(decile) %>%
    summarize(
      total = n(),
      events = sum(Log_Worldwide_Gross_Category == category),
      cumulative_events = cumsum(events),
      cumulative_percentage = cumulative_events / sum(Log_Worldwide_Gross_Category == category)
    )
  
  # Plot Lift Chart
  plot(
    lift_table$decile, lift_table$cumulative_percentage,
    type = "o", col = "blue", xlab = "Decile", ylab = "Cumulative Gain",
    main = paste("Lift Chart for", category)
  )
  abline(0, 0.1, col = "gray", lty = 2) # Reference line
}

The polynomial logistic regression model was evaluated on test data, and its performance metrics were captured through a confusion matrix, ROC curves, and a lift chart for the “Good” category.

Confusion Matrix and Classification Metrics

Overall Accuracy: The model achieved an accuracy of 45.51%, which is close to the No Information Rate (NIR) of 45.79%, indicating that the model struggles to outperform random guessing for the majority class.

Kappa Statistic: The kappa value of 0.1212 suggests weak agreement between the predicted and actual classes beyond chance. McNemar’s Test: The p-value of 0.02813 indicates significant differences in classification errors for paired observations, suggesting the model’s misclassification patterns are not random.

Performance by Class:

Excellent:

Sensitivity: 33.78%, indicating the model identifies only about a third of actual “Excellent” instances correctly. Specificity: 85.11%, suggesting the model is good at identifying instances not belonging to this category. AUC: 0.6812, which reflects moderate discriminative ability for the “Excellent” class.

Good:

Sensitivity: 57.06%, showing the model is better at identifying “Good” instances compared to the other classes. Specificity: 44.56%, highlighting challenges in distinguishing “Good” from other classes. AUC: 0.5363, close to random guessing, showing weak predictive ability for “Good.”

Poor:

Sensitivity: 36.97%, indicating limited ability to correctly classify “Poor” instances. Specificity: 81.01%, meaning the model can exclude instances not belonging to “Poor” relatively well. AUC: 0.6851, slightly better but still moderate performance.

Decision tree

# Load required libraries
library(rpart)
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 4.3.2
# Train Decision Tree for Classification
dt_model_categorical <- rpart(
  Log_Worldwide_Gross_Category ~ Log_production_budget_adj + PG.13 + R + PG + G + 
    between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count + 
    Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime + 
    Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror + 
    Main_Mystery + Main_Romance + Main_Science_Fiction + Main_Thriller,
  data = train_data,
  method = "class", # Classification tree
  control = rpart.control(
    cp = 0.003,      # Smaller complexity parameter for more splits
    maxdepth = 10,   # Allow deeper trees
    minsplit = 10    # Minimum observations required to split
  )
)

# Plot the decision tree
rpart.plot(
  dt_model_categorical,
  type = 3,             # Show splits and probabilities
  extra = 101,          # Display n, % observations, and class probabilities
  under = TRUE,         # Show text under the nodes
  fallen.leaves = TRUE, # Spread the leaves horizontally
  box.palette = "Blues" # Color scheme for the boxes
)

# Print a summary of the model
summary(dt_model_categorical)
## Call:
## rpart(formula = Log_Worldwide_Gross_Category ~ Log_production_budget_adj + 
##     PG.13 + R + PG + G + between_90_to_135 + Greater_than_135 + 
##     Spring + Summer + Fall + genre_count + Main_Action + Main_Adventure + 
##     Main_Animation + Main_Comedy + Main_Crime + Main_Documentary + 
##     Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery + 
##     Main_Romance + Main_Science_Fiction + Main_Thriller, data = train_data, 
##     method = "class", control = rpart.control(cp = 0.003, maxdepth = 10, 
##         minsplit = 10))
##   n= 828 
## 
##            CP nsplit rel error    xerror       xstd
## 1 0.323583181      0 1.0000000 1.0530165 0.02420522
## 2 0.109689214      1 0.6764168 0.6800731 0.02616685
## 3 0.011578306      2 0.5667276 0.6581353 0.02607784
## 4 0.006398537      5 0.5319927 0.5850091 0.02561559
## 5 0.005484461      7 0.5191956 0.5813528 0.02558563
## 6 0.003656307      8 0.5137112 0.5868373 0.02563032
## 7 0.003046923     19 0.4716636 0.6087751 0.02579421
## 8 0.003000000     23 0.4570384 0.6270567 0.02591287
## 
## Variable importance
## Log_production_budget_adj               genre_count               Main_Action 
##                        69                         7                         5 
##            Main_Adventure                        PG                Main_Drama 
##                         4                         3                         2 
##                      Fall               Main_Comedy               Main_Horror 
##                         2                         2                         2 
##            Main_Animation                         R                     PG.13 
##                         1                         1                         1 
## 
## Node number 1: 828 observations,    complexity param=0.3235832
##   predicted class=Low's   expected loss=0.660628  P(node) =1
##     class counts:   271   281   276
##    probabilities: 0.327 0.339 0.333 
##   left son=2 (282 obs) right son=3 (546 obs)
##   Primary splits:
##       Log_production_budget_adj < 17.47917 to the right, improve=93.390790, (0 missing)
##       R                         < 0.5      to the left,  improve=17.560580, (0 missing)
##       Main_Action               < 0.5      to the right, improve=11.879580, (0 missing)
##       Main_Drama                < 0.5      to the left,  improve=10.839130, (0 missing)
##       PG                        < 0.5      to the right, improve= 8.994799, (0 missing)
##   Surrogate splits:
##       Main_Action    < 0.5      to the right, agree=0.693, adj=0.099, (0 split)
##       Main_Adventure < 0.5      to the right, agree=0.690, adj=0.089, (0 split)
##       genre_count    < 3.5      to the right, agree=0.680, adj=0.060, (0 split)
##       PG             < 0.5      to the right, agree=0.679, adj=0.057, (0 split)
##       Main_Animation < 0.5      to the right, agree=0.670, adj=0.032, (0 split)
## 
## Node number 2: 282 observations,    complexity param=0.003656307
##   predicted class=High's  expected loss=0.3049645  P(node) =0.3405797
##     class counts:   196    19    67
##    probabilities: 0.695 0.067 0.238 
##   left son=4 (110 obs) right son=5 (172 obs)
##   Primary splits:
##       Log_production_budget_adj < 18.19916 to the right, improve=12.389060, (0 missing)
##       genre_count               < 5.5      to the left,  improve= 4.337058, (0 missing)
##       R                         < 0.5      to the left,  improve= 2.627660, (0 missing)
##       Main_Romance              < 0.5      to the left,  improve= 2.005515, (0 missing)
##       PG                        < 0.5      to the right, improve= 1.586866, (0 missing)
##   Surrogate splits:
##       Main_Adventure < 0.5      to the right, agree=0.660, adj=0.127, (0 split)
##       PG             < 0.5      to the right, agree=0.635, adj=0.064, (0 split)
##       genre_count    < 3.5      to the right, agree=0.631, adj=0.055, (0 split)
##       G              < 0.5      to the right, agree=0.621, adj=0.027, (0 split)
##       Main_Fantasy   < 0.5      to the right, agree=0.621, adj=0.027, (0 split)
## 
## Node number 3: 546 observations,    complexity param=0.1096892
##   predicted class=Low's   expected loss=0.5201465  P(node) =0.6594203
##     class counts:    75   262   209
##    probabilities: 0.137 0.480 0.383 
##   left son=6 (188 obs) right son=7 (358 obs)
##   Primary splits:
##       Log_production_budget_adj < 16.0469  to the left,  improve=39.860070, (0 missing)
##       R                         < 0.5      to the right, improve= 5.528132, (0 missing)
##       Main_Drama                < 0.5      to the right, improve= 4.601632, (0 missing)
##       PG.13                     < 0.5      to the left,  improve= 2.926376, (0 missing)
##       Main_Action               < 0.5      to the left,  improve= 1.834008, (0 missing)
##   Surrogate splits:
##       Main_Horror      < 0.5      to the right, agree=0.672, adj=0.048, (0 split)
##       Main_Documentary < 0.5      to the right, agree=0.659, adj=0.011, (0 split)
##       Main_Fantasy     < 0.5      to the right, agree=0.658, adj=0.005, (0 split)
## 
## Node number 4: 110 observations
##   predicted class=High's  expected loss=0.1  P(node) =0.1328502
##     class counts:    99     2     9
##    probabilities: 0.900 0.018 0.082 
## 
## Node number 5: 172 observations,    complexity param=0.003656307
##   predicted class=High's  expected loss=0.4360465  P(node) =0.2077295
##     class counts:    97    17    58
##    probabilities: 0.564 0.099 0.337 
##   left son=10 (168 obs) right son=11 (4 obs)
##   Primary splits:
##       genre_count               < 5.5      to the left,  improve=3.1414730, (0 missing)
##       Log_production_budget_adj < 17.90181 to the right, improve=2.1174260, (0 missing)
##       Main_Romance              < 0.5      to the left,  improve=1.0904750, (0 missing)
##       R                         < 0.5      to the left,  improve=0.8355518, (0 missing)
##       PG                        < 0.5      to the right, improve=0.6807246, (0 missing)
## 
## Node number 6: 188 observations
##   predicted class=Low's   expected loss=0.2234043  P(node) =0.2270531
##     class counts:     9   146    33
##    probabilities: 0.048 0.777 0.176 
## 
## Node number 7: 358 observations,    complexity param=0.01157831
##   predicted class=Medium  expected loss=0.5083799  P(node) =0.4323671
##     class counts:    66   116   176
##    probabilities: 0.184 0.324 0.492 
##   left son=14 (186 obs) right son=15 (172 obs)
##   Primary splits:
##       Log_production_budget_adj < 16.89341 to the left,  improve=8.885337, (0 missing)
##       R                         < 0.5      to the right, improve=2.435754, (0 missing)
##       Main_Horror               < 0.5      to the right, improve=1.459841, (0 missing)
##       Main_Drama                < 0.5      to the right, improve=1.358856, (0 missing)
##       PG.13                     < 0.5      to the left,  improve=1.156377, (0 missing)
##   Surrogate splits:
##       Main_Action      < 0.5      to the left,  agree=0.601, adj=0.169, (0 split)
##       genre_count      < 2.5      to the left,  agree=0.550, adj=0.064, (0 split)
##       Main_Drama       < 0.5      to the right, agree=0.550, adj=0.064, (0 split)
##       Greater_than_135 < 0.5      to the left,  agree=0.539, adj=0.041, (0 split)
##       Spring           < 0.5      to the left,  agree=0.528, adj=0.017, (0 split)
## 
## Node number 10: 168 observations,    complexity param=0.003656307
##   predicted class=High's  expected loss=0.422619  P(node) =0.2028986
##     class counts:    97    17    54
##    probabilities: 0.577 0.101 0.321 
##   left son=20 (57 obs) right son=21 (111 obs)
##   Primary splits:
##       Log_production_budget_adj < 17.90181 to the right, improve=2.0337840, (0 missing)
##       PG                        < 0.5      to the right, improve=1.2824840, (0 missing)
##       R                         < 0.5      to the left,  improve=1.0190320, (0 missing)
##       genre_count               < 2.5      to the left,  improve=0.7738095, (0 missing)
##       Main_Science_Fiction      < 0.5      to the left,  improve=0.5772727, (0 missing)
##   Surrogate splits:
##       Main_Adventure < 0.5      to the right, agree=0.679, adj=0.053, (0 split)
##       genre_count    < 3.5      to the right, agree=0.673, adj=0.035, (0 split)
##       Main_Mystery   < 0.5      to the right, agree=0.667, adj=0.018, (0 split)
## 
## Node number 11: 4 observations
##   predicted class=Medium  expected loss=0  P(node) =0.004830918
##     class counts:     0     0     4
##    probabilities: 0.000 0.000 1.000 
## 
## Node number 14: 186 observations,    complexity param=0.01157831
##   predicted class=Low's   expected loss=0.5537634  P(node) =0.2246377
##     class counts:    26    83    77
##    probabilities: 0.140 0.446 0.414 
##   left son=28 (14 obs) right son=29 (172 obs)
##   Primary splits:
##       Main_Horror  < 0.5      to the right, improve=2.220716, (0 missing)
##       R            < 0.5      to the right, improve=2.141954, (0 missing)
##       Main_Romance < 0.5      to the left,  improve=1.807414, (0 missing)
##       Main_Mystery < 0.5      to the left,  improve=1.713908, (0 missing)
##       Main_Action  < 0.5      to the right, improve=1.073431, (0 missing)
## 
## Node number 15: 172 observations
##   predicted class=Medium  expected loss=0.4244186  P(node) =0.2077295
##     class counts:    40    33    99
##    probabilities: 0.233 0.192 0.576 
## 
## Node number 20: 57 observations,    complexity param=0.003656307
##   predicted class=High's  expected loss=0.2982456  P(node) =0.06884058
##     class counts:    40     3    14
##    probabilities: 0.702 0.053 0.246 
##   left son=40 (45 obs) right son=41 (12 obs)
##   Primary splits:
##       Main_Comedy       < 0.5      to the left,  improve=3.011111, (0 missing)
##       Spring            < 0.5      to the right, improve=1.843972, (0 missing)
##       between_90_to_135 < 0.5      to the left,  improve=1.428195, (0 missing)
##       PG                < 0.5      to the right, improve=1.428195, (0 missing)
##       genre_count       < 1.5      to the right, improve=1.202564, (0 missing)
##   Surrogate splits:
##       genre_count < 1.5      to the right, agree=0.877, adj=0.417, (0 split)
## 
## Node number 21: 111 observations,    complexity param=0.003656307
##   predicted class=High's  expected loss=0.4864865  P(node) =0.134058
##     class counts:    57    14    40
##    probabilities: 0.514 0.126 0.360 
##   left son=42 (97 obs) right son=43 (14 obs)
##   Primary splits:
##       genre_count               < 3.5      to the left,  improve=2.2726720, (0 missing)
##       Main_Adventure            < 0.5      to the left,  improve=1.2347350, (0 missing)
##       between_90_to_135         < 0.5      to the right, improve=1.0828830, (0 missing)
##       Main_Comedy               < 0.5      to the right, improve=1.0193910, (0 missing)
##       Log_production_budget_adj < 17.55139 to the left,  improve=0.7932705, (0 missing)
## 
## Node number 28: 14 observations,    complexity param=0.005484461
##   predicted class=Medium  expected loss=0.5  P(node) =0.01690821
##     class counts:     5     2     7
##    probabilities: 0.357 0.143 0.500 
##   left son=56 (7 obs) right son=57 (7 obs)
##   Primary splits:
##       Log_production_budget_adj < 16.34233 to the right, improve=2.7142860, (0 missing)
##       PG.13                     < 0.5      to the left,  improve=0.6785714, (0 missing)
##       R                         < 0.5      to the right, improve=0.6785714, (0 missing)
##       between_90_to_135         < 0.5      to the left,  improve=0.6785714, (0 missing)
##       Summer                    < 0.5      to the right, improve=0.5119048, (0 missing)
##   Surrogate splits:
##       between_90_to_135 < 0.5      to the left,  agree=0.643, adj=0.286, (0 split)
##       Fall              < 0.5      to the left,  agree=0.571, adj=0.143, (0 split)
## 
## Node number 29: 172 observations,    complexity param=0.01157831
##   predicted class=Low's   expected loss=0.5290698  P(node) =0.2077295
##     class counts:    21    81    70
##    probabilities: 0.122 0.471 0.407 
##   left son=58 (87 obs) right son=59 (85 obs)
##   Primary splits:
##       R                         < 0.5      to the right, improve=2.339310, (0 missing)
##       Main_Romance              < 0.5      to the left,  improve=1.968793, (0 missing)
##       Main_Mystery              < 0.5      to the left,  improve=1.796408, (0 missing)
##       Log_production_budget_adj < 16.69612 to the left,  improve=1.234713, (0 missing)
##       between_90_to_135         < 0.5      to the left,  improve=1.141605, (0 missing)
##   Surrogate splits:
##       PG.13                     < 0.5      to the left,  agree=0.890, adj=0.776, (0 split)
##       PG                        < 0.5      to the left,  agree=0.599, adj=0.188, (0 split)
##       Log_production_budget_adj < 16.64124 to the left,  agree=0.581, adj=0.153, (0 split)
##       Spring                    < 0.5      to the left,  agree=0.564, adj=0.118, (0 split)
##       Fall                      < 0.5      to the right, agree=0.558, adj=0.106, (0 split)
## 
## Node number 40: 45 observations
##   predicted class=High's  expected loss=0.2222222  P(node) =0.05434783
##     class counts:    35     3     7
##    probabilities: 0.778 0.067 0.156 
## 
## Node number 41: 12 observations,    complexity param=0.003656307
##   predicted class=Medium  expected loss=0.4166667  P(node) =0.01449275
##     class counts:     5     0     7
##    probabilities: 0.417 0.000 0.583 
##   left son=82 (8 obs) right son=83 (4 obs)
##   Primary splits:
##       Log_production_budget_adj < 18.138   to the left,  improve=2.083333000, (0 missing)
##       Summer                    < 0.5      to the left,  improve=2.083333000, (0 missing)
##       PG.13                     < 0.5      to the left,  improve=0.166666700, (0 missing)
##       genre_count               < 2.5      to the left,  improve=0.055555560, (0 missing)
##       R                         < 0.5      to the left,  improve=0.004761905, (0 missing)
## 
## Node number 42: 97 observations,    complexity param=0.003046923
##   predicted class=High's  expected loss=0.443299  P(node) =0.1171498
##     class counts:    54    11    32
##    probabilities: 0.557 0.113 0.330 
##   left son=84 (20 obs) right son=85 (77 obs)
##   Primary splits:
##       Main_Comedy               < 0.5      to the right, improve=0.9470076, (0 missing)
##       Log_production_budget_adj < 17.56539 to the left,  improve=0.7965206, (0 missing)
##       genre_count               < 2.5      to the left,  improve=0.6964138, (0 missing)
##       between_90_to_135         < 0.5      to the right, improve=0.6771579, (0 missing)
##       Main_Drama                < 0.5      to the right, improve=0.5467995, (0 missing)
##   Surrogate splits:
##       genre_count < 1.5      to the left,  agree=0.814, adj=0.1, (0 split)
## 
## Node number 43: 14 observations
##   predicted class=Medium  expected loss=0.4285714  P(node) =0.01690821
##     class counts:     3     3     8
##    probabilities: 0.214 0.214 0.571 
## 
## Node number 56: 7 observations
##   predicted class=High's  expected loss=0.4285714  P(node) =0.008454106
##     class counts:     4     2     1
##    probabilities: 0.571 0.286 0.143 
## 
## Node number 57: 7 observations
##   predicted class=Medium  expected loss=0.1428571  P(node) =0.008454106
##     class counts:     1     0     6
##    probabilities: 0.143 0.000 0.857 
## 
## Node number 58: 87 observations,    complexity param=0.003656307
##   predicted class=Low's   expected loss=0.4367816  P(node) =0.1050725
##     class counts:     8    49    30
##    probabilities: 0.092 0.563 0.345 
##   left son=116 (51 obs) right son=117 (36 obs)
##   Primary splits:
##       Fall                      < 0.5      to the left,  improve=2.4264140, (0 missing)
##       Spring                    < 0.5      to the right, improve=1.5136200, (0 missing)
##       Summer                    < 0.5      to the right, improve=1.0899550, (0 missing)
##       Log_production_budget_adj < 16.40027 to the left,  improve=0.5339603, (0 missing)
##       genre_count               < 2.5      to the right, improve=0.5277214, (0 missing)
##   Surrogate splits:
##       Log_production_budget_adj < 16.52246 to the left,  agree=0.644, adj=0.139, (0 split)
##       Summer                    < 0.5      to the right, agree=0.621, adj=0.083, (0 split)
##       Main_Drama                < 0.5      to the left,  agree=0.609, adj=0.056, (0 split)
##       Main_Adventure            < 0.5      to the left,  agree=0.598, adj=0.028, (0 split)
## 
## Node number 59: 85 observations,    complexity param=0.006398537
##   predicted class=Medium  expected loss=0.5294118  P(node) =0.102657
##     class counts:    13    32    40
##    probabilities: 0.153 0.376 0.471 
##   left son=118 (56 obs) right son=119 (29 obs)
##   Primary splits:
##       Log_production_budget_adj < 16.69612 to the left,  improve=1.4084180, (0 missing)
##       Main_Mystery              < 0.5      to the left,  improve=1.3850790, (0 missing)
##       Main_Romance              < 0.5      to the left,  improve=0.9621641, (0 missing)
##       Main_Drama                < 0.5      to the right, improve=0.8483837, (0 missing)
##       genre_count               < 1.5      to the left,  improve=0.8176471, (0 missing)
##   Surrogate splits:
##       PG                < 0.5      to the left,  agree=0.682, adj=0.069, (0 split)
##       Main_Adventure    < 0.5      to the left,  agree=0.682, adj=0.069, (0 split)
##       between_90_to_135 < 0.5      to the right, agree=0.671, adj=0.034, (0 split)
## 
## Node number 82: 8 observations
##   predicted class=High's  expected loss=0.375  P(node) =0.009661836
##     class counts:     5     0     3
##    probabilities: 0.625 0.000 0.375 
## 
## Node number 83: 4 observations
##   predicted class=Medium  expected loss=0  P(node) =0.004830918
##     class counts:     0     0     4
##    probabilities: 0.000 0.000 1.000 
## 
## Node number 84: 20 observations
##   predicted class=High's  expected loss=0.3  P(node) =0.02415459
##     class counts:    14     2     4
##    probabilities: 0.700 0.100 0.200 
## 
## Node number 85: 77 observations,    complexity param=0.003046923
##   predicted class=High's  expected loss=0.4805195  P(node) =0.09299517
##     class counts:    40     9    28
##    probabilities: 0.519 0.117 0.364 
##   left son=170 (24 obs) right son=171 (53 obs)
##   Primary splits:
##       Log_production_budget_adj < 17.74953 to the right, improve=1.1332390, (0 missing)
##       Main_Drama                < 0.5      to the right, improve=0.7355762, (0 missing)
##       Main_Animation            < 0.5      to the left,  improve=0.4374634, (0 missing)
##       Main_Science_Fiction      < 0.5      to the left,  improve=0.4374634, (0 missing)
##       between_90_to_135         < 0.5      to the right, improve=0.4306027, (0 missing)
##   Surrogate splits:
##       Main_Thriller        < 0.5      to the right, agree=0.727, adj=0.125, (0 split)
##       Main_Animation       < 0.5      to the right, agree=0.701, adj=0.042, (0 split)
##       Main_Science_Fiction < 0.5      to the right, agree=0.701, adj=0.042, (0 split)
## 
## Node number 116: 51 observations
##   predicted class=Low's   expected loss=0.3529412  P(node) =0.0615942
##     class counts:     6    33    12
##    probabilities: 0.118 0.647 0.235 
## 
## Node number 117: 36 observations,    complexity param=0.003656307
##   predicted class=Medium  expected loss=0.5  P(node) =0.04347826
##     class counts:     2    16    18
##    probabilities: 0.056 0.444 0.500 
##   left son=234 (3 obs) right son=235 (33 obs)
##   Primary splits:
##       Log_production_budget_adj < 16.13019 to the left,  improve=1.2323230, (0 missing)
##       Main_Drama                < 0.5      to the right, improve=0.8527778, (0 missing)
##       genre_count               < 1.5      to the left,  improve=0.5777778, (0 missing)
##       Main_Thriller             < 0.5      to the right, improve=0.3402778, (0 missing)
##       Main_Action               < 0.5      to the right, improve=0.2626263, (0 missing)
## 
## Node number 118: 56 observations,    complexity param=0.006398537
##   predicted class=Low's   expected loss=0.5535714  P(node) =0.06763285
##     class counts:     8    25    23
##    probabilities: 0.143 0.446 0.411 
##   left son=236 (24 obs) right son=237 (32 obs)
##   Primary splits:
##       Main_Drama                < 0.5      to the right, improve=1.6458330, (0 missing)
##       genre_count               < 3.5      to the right, improve=1.2884620, (0 missing)
##       Log_production_budget_adj < 16.17715 to the right, improve=1.0702020, (0 missing)
##       Main_Romance              < 0.5      to the left,  improve=0.9544025, (0 missing)
##       between_90_to_135         < 0.5      to the left,  improve=0.8034591, (0 missing)
##   Surrogate splits:
##       Main_Comedy               < 0.5      to the left,  agree=0.714, adj=0.333, (0 split)
##       genre_count               < 2.5      to the left,  agree=0.661, adj=0.208, (0 split)
##       Log_production_budget_adj < 16.59911 to the right, agree=0.589, adj=0.042, (0 split)
## 
## Node number 119: 29 observations
##   predicted class=Medium  expected loss=0.4137931  P(node) =0.03502415
##     class counts:     5     7    17
##    probabilities: 0.172 0.241 0.586 
## 
## Node number 170: 24 observations
##   predicted class=High's  expected loss=0.3333333  P(node) =0.02898551
##     class counts:    16     1     7
##    probabilities: 0.667 0.042 0.292 
## 
## Node number 171: 53 observations,    complexity param=0.003046923
##   predicted class=High's  expected loss=0.5471698  P(node) =0.06400966
##     class counts:    24     8    21
##    probabilities: 0.453 0.151 0.396 
##   left son=342 (20 obs) right son=343 (33 obs)
##   Primary splits:
##       Log_production_budget_adj < 17.55589 to the left,  improve=2.0189250, (0 missing)
##       Main_Drama                < 0.5      to the right, improve=1.9834840, (0 missing)
##       Main_Horror               < 0.5      to the right, improve=1.5237740, (0 missing)
##       between_90_to_135         < 0.5      to the right, improve=0.8871069, (0 missing)
##       genre_count               < 1.5      to the left,  improve=0.5731613, (0 missing)
##   Surrogate splits:
##       Main_Romance < 0.5      to the right, agree=0.66, adj=0.1, (0 split)
## 
## Node number 234: 3 observations
##   predicted class=Low's   expected loss=0.3333333  P(node) =0.003623188
##     class counts:     1     2     0
##    probabilities: 0.333 0.667 0.000 
## 
## Node number 235: 33 observations,    complexity param=0.003656307
##   predicted class=Medium  expected loss=0.4545455  P(node) =0.03985507
##     class counts:     1    14    18
##    probabilities: 0.030 0.424 0.545 
##   left son=470 (15 obs) right son=471 (18 obs)
##   Primary splits:
##       Main_Drama                < 0.5      to the right, improve=0.9454545, (0 missing)
##       Log_production_budget_adj < 16.54416 to the right, improve=0.7736597, (0 missing)
##       genre_count               < 1.5      to the left,  improve=0.7676768, (0 missing)
##       Main_Comedy               < 0.5      to the left,  improve=0.4319014, (0 missing)
##       Main_Action               < 0.5      to the right, improve=0.3454545, (0 missing)
##   Surrogate splits:
##       Log_production_budget_adj < 16.62341 to the right, agree=0.667, adj=0.267, (0 split)
##       Main_Comedy               < 0.5      to the left,  agree=0.667, adj=0.267, (0 split)
##       Greater_than_135          < 0.5      to the right, agree=0.606, adj=0.133, (0 split)
##       genre_count               < 1.5      to the left,  agree=0.606, adj=0.133, (0 split)
##       between_90_to_135         < 0.5      to the left,  agree=0.576, adj=0.067, (0 split)
## 
## Node number 236: 24 observations,    complexity param=0.003656307
##   predicted class=Low's   expected loss=0.4583333  P(node) =0.02898551
##     class counts:     5    13     6
##    probabilities: 0.208 0.542 0.250 
##   left son=472 (18 obs) right son=473 (6 obs)
##   Primary splits:
##       Fall                      < 0.5      to the left,  improve=1.7500000, (0 missing)
##       Log_production_budget_adj < 16.1696  to the left,  improve=1.0833330, (0 missing)
##       Summer                    < 0.5      to the left,  improve=1.0833330, (0 missing)
##       Spring                    < 0.5      to the right, improve=0.7191877, (0 missing)
##       genre_count               < 1.5      to the left,  improve=0.2916667, (0 missing)
## 
## Node number 237: 32 observations,    complexity param=0.003656307
##   predicted class=Medium  expected loss=0.46875  P(node) =0.03864734
##     class counts:     3    12    17
##    probabilities: 0.094 0.375 0.531 
##   left son=474 (3 obs) right son=475 (29 obs)
##   Primary splits:
##       genre_count               < 3.5      to the right, improve=1.4058910, (0 missing)
##       Log_production_budget_adj < 16.17023 to the right, improve=1.0592950, (0 missing)
##       Main_Comedy               < 0.5      to the left,  improve=0.8125000, (0 missing)
##       Main_Romance              < 0.5      to the right, improve=0.7162356, (0 missing)
##       Main_Thriller             < 0.5      to the right, improve=0.4403736, (0 missing)
## 
## Node number 342: 20 observations
##   predicted class=High's  expected loss=0.35  P(node) =0.02415459
##     class counts:    13     2     5
##    probabilities: 0.650 0.100 0.250 
## 
## Node number 343: 33 observations,    complexity param=0.003046923
##   predicted class=Medium  expected loss=0.5151515  P(node) =0.03985507
##     class counts:    11     6    16
##    probabilities: 0.333 0.182 0.485 
##   left son=686 (13 obs) right son=687 (20 obs)
##   Primary splits:
##       Main_Drama                < 0.5      to the right, improve=2.0771560, (0 missing)
##       Log_production_budget_adj < 17.57828 to the right, improve=0.8035298, (0 missing)
##       Summer                    < 0.5      to the left,  improve=0.8035298, (0 missing)
##       genre_count               < 2.5      to the left,  improve=0.7127897, (0 missing)
##       Main_Action               < 0.5      to the left,  improve=0.5757576, (0 missing)
##   Surrogate splits:
##       genre_count      < 2.5      to the left,  agree=0.727, adj=0.308, (0 split)
##       Main_Action      < 0.5      to the left,  agree=0.727, adj=0.308, (0 split)
##       PG.13            < 0.5      to the right, agree=0.697, adj=0.231, (0 split)
##       Greater_than_135 < 0.5      to the right, agree=0.697, adj=0.231, (0 split)
##       Fall             < 0.5      to the right, agree=0.697, adj=0.231, (0 split)
## 
## Node number 470: 15 observations
##   predicted class=Low's   expected loss=0.4666667  P(node) =0.01811594
##     class counts:     1     8     6
##    probabilities: 0.067 0.533 0.400 
## 
## Node number 471: 18 observations
##   predicted class=Medium  expected loss=0.3333333  P(node) =0.02173913
##     class counts:     0     6    12
##    probabilities: 0.000 0.333 0.667 
## 
## Node number 472: 18 observations
##   predicted class=Low's   expected loss=0.3333333  P(node) =0.02173913
##     class counts:     3    12     3
##    probabilities: 0.167 0.667 0.167 
## 
## Node number 473: 6 observations
##   predicted class=Medium  expected loss=0.5  P(node) =0.007246377
##     class counts:     2     1     3
##    probabilities: 0.333 0.167 0.500 
## 
## Node number 474: 3 observations
##   predicted class=Low's   expected loss=0.3333333  P(node) =0.003623188
##     class counts:     1     2     0
##    probabilities: 0.333 0.667 0.000 
## 
## Node number 475: 29 observations
##   predicted class=Medium  expected loss=0.4137931  P(node) =0.03502415
##     class counts:     2    10    17
##    probabilities: 0.069 0.345 0.586 
## 
## Node number 686: 13 observations
##   predicted class=High's  expected loss=0.5384615  P(node) =0.01570048
##     class counts:     6     4     3
##    probabilities: 0.462 0.308 0.231 
## 
## Node number 687: 20 observations
##   predicted class=Medium  expected loss=0.35  P(node) =0.02415459
##     class counts:     5     2    13
##    probabilities: 0.250 0.100 0.650

The decision tree is a classification model that predicts the Log_Worldwide_Gross_Category (categorized into classes such as “Low’s,” “Medium,” and “High’s”) based on several predictor variables, including Log_production_budget_adj, ratings (e.g., PG.13, R, PG), seasonal release (Spring, Summer, Fall), and genre-related features (Main_Drama, Main_Action, etc.). The structure of the tree is based on splits of these features, with terminal nodes (leaves) providing the final predicted class.

Primary Split (Budget):

The first split is based on Log_production_budget_adj. If the adjusted production budget is less than 17.47917, the observation moves left. If it’s greater, it moves right. This indicates budget is the most critical variable determining whether a movie falls into “High’s,” “Medium,” or “Low’s” gross categories. Why it’s significant: Movies with larger budgets typically have higher grossing potential. This makes sense logically and aligns with known patterns in the film industry. Genre and Ratings:

Features like Main_Action, genre_count, and R also appear in splits, particularly when budget isn’t decisive. These secondary splits suggest that specific genres and whether a movie is rated R or otherwise help refine predictions, especially for mid-budget films. Class Distribution at Leaves:

At the terminal nodes (leaves), the probabilities show how strongly one category dominates: Node 4: Strong prediction for “High’s” (90% confidence). Node 14: Mixed category (“Low’s” dominates, but with 44.6% probability and competition from “Medium”).

Variable Importance:

Log_production_budget_adj is the most impactful variable, followed by genre_count, Main_Action, and others. Lower-ranked variables (e.g., Main_Horror, PG.13, R) appear occasionally but are less critical.

Evaluation of the Decision Tree

# Predict on the test data
test_data$predicted_categories <- predict(dt_model_categorical, newdata = test_data, type = "class")

# Confusion Matrix
library(caret)
confusion_matrix <- confusionMatrix(
  data = factor(test_data$predicted_categories, levels = levels(test_data$Log_Worldwide_Gross_Category)),
  reference = test_data$Log_Worldwide_Gross_Category
)
print(confusion_matrix)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction High's Low's Medium
##     High's     84     2     27
##     Low's      12    74     35
##     Medium     27    38     57
## 
## Overall Statistics
##                                          
##                Accuracy : 0.6039         
##                  95% CI : (0.551, 0.6551)
##     No Information Rate : 0.3455         
##     P-Value [Acc > NIR] : < 2e-16        
##                                          
##                   Kappa : 0.4061         
##                                          
##  Mcnemar's Test P-Value : 0.06388        
## 
## Statistics by Class:
## 
##                      Class: High's Class: Low's Class: Medium
## Sensitivity                 0.6829       0.6491        0.4790
## Specificity                 0.8755       0.8058        0.7257
## Pos Pred Value              0.7434       0.6116        0.4672
## Neg Pred Value              0.8395       0.8298        0.7350
## Prevalence                  0.3455       0.3202        0.3343
## Detection Rate              0.2360       0.2079        0.1601
## Detection Prevalence        0.3174       0.3399        0.3427
## Balanced Accuracy           0.7792       0.7275        0.6024
library(pROC)

# Predict probabilities for test data
predicted_probs <- predict(dt_model_categorical, newdata = test_data, type = "prob")

# Initialize lists for storing ROC and AUC values
roc_list <- list()
auc_list <- list()
categories <- levels(test_data$Log_Worldwide_Gross_Category)

# Generate ROC and AUC for each class
for (category in categories) {
  # Create binary response for "One-vs-All"
  true_binary <- ifelse(test_data$Log_Worldwide_Gross_Category == category, 1, 0)
  predicted_probs_binary <- predicted_probs[, category]
  
  # ROC Curve
  roc_obj <- roc(true_binary, predicted_probs_binary)
  roc_list[[category]] <- roc_obj
  auc_list[[category]] <- auc(roc_obj)
  
  # Plot ROC Curve
  plot(roc_obj, main = paste("ROC Curve for", category), col = "blue")
  abline(a = 0, b = 1, col = "gray", lty = 2) # Reference line
  cat("AUC for", category, ":", auc_list[[category]], "\n")
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for High's : 0.8352176
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## AUC for Low's : 0.8079237
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## AUC for Medium : 0.6504627
library(dplyr)

# Convert predicted_probs to a data frame
predicted_probs <- as.data.frame(predicted_probs)

# Assign proper column names to predicted_probs
colnames(predicted_probs) <- levels(test_data$Log_Worldwide_Gross_Category)

# Add predicted probabilities to test_data
test_data <- cbind(test_data, predicted_probs)

# Ensure unique column names in test_data
names(test_data) <- make.unique(names(test_data))

# Verify no duplicate column names
if (anyDuplicated(names(test_data)) > 0) {
  stop("Duplicate column names still exist in test_data!")
}


# Specify the category for analysis
category <- "High's"

# Sort test_data by predicted probabilities for the chosen category
test_data <- test_data %>%
  arrange(desc(test_data[[category]]))



# Add deciles for the chosen category
test_data$decile <- ntile(test_data[[category]], 10)

# Calculate Lift Table
lift_table <- test_data %>%
  group_by(decile) %>%
  summarize(
    total = n(),
    events = sum(Log_Worldwide_Gross_Category == category),
    cumulative_events = cumsum(events),
    cumulative_percentage = cumulative_events / sum(Log_Worldwide_Gross_Category == category)
  )

# Print Lift Table for verification
print(lift_table)
## # A tibble: 10 × 5
##    decile total events cumulative_events cumulative_percentage
##     <int> <int>  <int>             <int>                 <dbl>
##  1      1    36      6                 6                     1
##  2      2    36      2                 2                     1
##  3      3    36      5                 5                     1
##  4      4    36      6                 6                     1
##  5      5    36      6                 6                     1
##  6      6    36      5                 5                     1
##  7      7    35     13                13                     1
##  8      8    35     16                16                     1
##  9      9    35     30                30                     1
## 10     10    35     34                34                     1
# Plot the Lift Chart
plot(
  lift_table$decile, lift_table$cumulative_percentage,
  type = "o", col = "blue", xlab = "Decile", ylab = "Cumulative Gain",
  main = paste("Lift Chart for", category)
)
abline(0, 0.1, col = "gray", lty = 2) # Reference random line

Confusion Matrix Overall Accuracy: The model achieved an accuracy of 60.39%. This means that approximately 60% of predictions align with the actual values. While this is above chance level, it leaves room for improvement.

Sensitivity (Recall):

High’s: The model correctly identified 68.29% of the High’s class instances. Low’s: The model correctly identified 64.91% of the Low’s class instances. Medium: The model correctly identified 47.90% of the Medium class instances. Specificity:

High’s: High specificity (87.55%), meaning it correctly rejected instances that were not High’s. Low’s and Medium: Slightly lower specificities for these classes.

Balanced Accuracy:

Balances sensitivity and specificity, with High’s being the best performing class (77.92%) and Medium lagging behind (60.24%). Lift Chart and Table Interpretation The Lift Chart for High’s indicates that the cumulative gain is flat across the deciles. This suggests that the model does not differentiate well between deciles for High’s. Ideally, you would expect a steeper gain curve early on, reflecting the model’s ability to concentrate High’s in the top deciles.

The Lift Table:

It shows the total number of predictions and actual High’s events per decile. The flatness of the cumulative percentage column confirms that the model’s ranking of High’s is not strong.

AUC for Classes

High’s (AUC = 0.835): Strong discrimination between High’s and other classes. Low’s (AUC = 0.808): Good performance but slightly lower than High’s. Medium (AUC = 0.650): Weakest discrimination, consistent with lower sensitivity and specificity.


For optimizing film investment, the best model is XGBoost.

Why? High Accuracy: Ensures precise prediction of “High’s” category, minimizing missed opportunities for high-grossing films. Feature Handling: Handles nonlinear relationships and interactions between features like budget, genre, and season effectively. Scalability: Performs well on large datasets and can be used to predict future investments reliably.

XGBoost is the most suitable for out business goal of maximizing returns on film investments.



IMDb_Rating

# Categorize IMDB_Category into buckets
data <- data %>%
  mutate(IMDB_Category = case_when(
    IMDb_Rating <= 6.0 ~ "Poor",
    IMDb_Rating > 6.0 & IMDb_Rating <= 7.0 ~ "Good",
    IMDb_Rating > 7.0 ~ "Excellent"
  ))

# Convert to factor
data$IMDB_Category <- as.factor(data$IMDB_Category)

# Check if the transformation is correct
table(data$IMDB_Category)
## 
## Excellent      Good      Poor 
##       273       532       379

successfully grouped movies into three distinct categories based on their IMDb ratings:

Excellent (IMDb_Rating > 7.0): Includes 273 movies that are rated highly by audiences. These movies likely have strong appeal and are regarded as high-quality productions. Good (6.0 < IMDb_Rating <= 7.0): This category, with 532 movies, represents the bulk of your dataset. These films are generally well-received but don’t reach the highest acclaim. Poor (IMDb_Rating <= 6.0): Encompassing 379 movies, this group includes those with weaker audience reception, potentially indicating lower production quality or appeal. By converting this information into a factor variable, you’ve prepared your data for categorical analysis, making it suitable for classification models. The counts indicate a reasonable distribution across the three categories, ensuring that each has sufficient representation for modeling.

This classification provides a foundation for identifying patterns and predictors of IMDb rating categories, aligning with your business objective of optimizing film investments based on audience reception.

Spliting the data

# Load necessary libraries for modeling and evaluation
library(caret)
library(glmnet)         # For Ridge and LASSO regression
library(randomForest)    # For Random Forest model
library(xgboost)         # For Gradient Boosting model
library(Metrics)         # For evaluation metrics

set.seed(123)  # For reproducibility

# Split the data
train_indices <- sample(1:nrow(data), size = 0.70 * nrow(data))
train_data <- data[train_indices, ]
test_data <- data[-train_indices, ]

multinomial logistic regression model

# Load the required library
library(nnet)

# Fit the multinomial logistic regression model
multinom_model <- multinom(
  IMDB_Category ~ Log_production_budget_adj + PG.13 + R + PG + G + 
    between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count + 
    Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime + 
    Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror + 
    Main_Mystery + Main_Romance + Main_Science_Fiction + Main_Thriller + Main_History, 
  data = train_data
)
## # weights:  84 (54 variable)
## initial  value 909.650975 
## iter  10 value 809.678022
## iter  20 value 793.107234
## iter  30 value 790.848413
## iter  40 value 789.935179
## iter  50 value 789.440017
## iter  60 value 789.428352
## final  value 789.428307 
## converged
# View model summary
summary(multinom_model)
## Call:
## multinom(formula = IMDB_Category ~ Log_production_budget_adj + 
##     PG.13 + R + PG + G + between_90_to_135 + Greater_than_135 + 
##     Spring + Summer + Fall + genre_count + Main_Action + Main_Adventure + 
##     Main_Animation + Main_Comedy + Main_Crime + Main_Documentary + 
##     Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery + 
##     Main_Romance + Main_Science_Fiction + Main_Thriller + Main_History, 
##     data = train_data)
## 
## Coefficients:
##      (Intercept) Log_production_budget_adj      PG.13          R         PG
## Good   24.927043               -0.02282168 -10.646506 -10.806320 -10.397427
## Poor    6.274926                0.13951657   7.413371   6.879478   7.810383
##              G between_90_to_135 Greater_than_135    Spring     Summer
## Good -10.63351       -0.01849204       -0.9017206 0.4697993 0.55413689
## Poor   5.98092       -1.12727045       -2.6300366 0.4255005 0.05195477
##            Fall genre_count Main_Action Main_Adventure Main_Animation
## Good -0.1520420  0.03402828   -12.92811      -13.36262      -14.15792
## Poor -0.5515793 -0.05509641   -13.84344      -14.08018      -16.44833
##      Main_Comedy Main_Crime Main_Documentary Main_Drama Main_Family
## Good   -13.27961  -13.07354        -14.55232  -14.04569    20.88705
## Poor   -13.97709  -14.80415        -15.16718  -15.08242    18.84732
##      Main_Fantasy Main_Horror Main_Mystery Main_Romance Main_Science_Fiction
## Good    -13.07642   -10.77096     27.74199    -12.98846            -13.27668
## Poor    -12.53571   -11.16733     26.58487    -14.04582            -13.73213
##      Main_Thriller Main_History
## Good     -13.09546    -12.89506
## Poor     -14.05920    -14.59161
## 
## Std. Errors:
##      (Intercept) Log_production_budget_adj     PG.13         R        PG
## Good    1.201371                0.08792285 0.3930964 0.3621882 0.4558769
## Poor    1.387132                0.10162394 0.4563415 0.4258400 0.5151556
##              G between_90_to_135 Greater_than_135    Spring    Summer      Fall
## Good 0.9054959         0.4149134        0.5604887 0.2948914 0.2851564 0.2573759
## Poor 1.1830739         0.4150361        0.6566664 0.3091847 0.3130964 0.2854068
##      genre_count Main_Action Main_Adventure Main_Animation Main_Comedy
## Good   0.1049707   0.5091539      0.6382766      0.7244662   0.4847574
## Poor   0.1166624   0.5196586      0.6472403      0.8687498   0.4897048
##      Main_Crime Main_Documentary Main_Drama Main_Family Main_Fantasy
## Good  0.5644480         1.028526  0.4607067   0.5883808     1.174404
## Poor  0.6657735         1.101435  0.4729978   0.5883808     1.085745
##      Main_Horror Main_Mystery Main_Romance Main_Science_Fiction Main_Thriller
## Good    1.044573    0.5350485    0.6929191            0.8112293      0.555842
## Poor    1.046672    0.5350485    0.7277143            0.8217027      0.582732
##      Main_History
## Good    0.9587806
## Poor    1.3065222
## 
## Residual Deviance: 1578.857 
## AIC: 1686.857

The AIC (Akaike Information Criterion) for the model is 1686.857, which is a metric to evaluate model quality. A lower AIC generally indicates a better model, but this depends on the complexity of the model and comparison with other models. The model converged successfully, as indicated by the declining deviance and the message.

Coefficients:

The coefficients represent the log odds of being in the “Good” or “Poor” categories compared to the reference category (likely “Excellent,” although this is implied and not explicitly stated in the output).

Statistical Significance:

The standard errors (SE) allow us to assess the significance of coefficients:

If the absolute value of a coefficient divided by its SE is greater than ~1.96, the coefficient is statistically significant at a 95% confidence level.

Variable Insights:

Log_production_budget_adj: Slightly negative for “Good” but positive for “Poor.” This suggests that films with higher budgets may have more extreme outcomes (either “Excellent” or “Poor”) rather than falling in the “Good” category.

Seasonality:

Summer has a positive association for both “Good” and “Poor,” indicating it might not strongly correlate with “Excellent” films. Fall has a negative association for both “Good” and “Poor,” implying it may favor “Excellent” films.

Genres:

Main_Family has a strong positive association with “Good” compared to “Excellent,” while other genres (like Main_Comedy, Main_Crime, Main_Action) have large negative coefficients, indicating a lower likelihood of being “Good” relative to “Excellent.” Main_Mystery shows a significant positive association with “Good,” while Main_Drama and Main_Romance negatively correlate with “Poor.”

Model Fit:

Residual Deviance: At 1578.857, this value reflects how well the model fits the data. Lower deviance indicates better fit, but the absolute value alone is hard to interpret without comparing it to the null model. AIC: A value of 1686.857 suggests that while the model is moderately complex, there is room for improvement. Consider comparing this AIC with other models.

Key Takeaway:

The model suggests that film attributes like budget, genres, and seasonality are significant predictors of IMDB ratings. However, some coefficients are counterintuitive or weak, and further refinement (e.g., interaction terms, feature selection) might improve predictions.

The high deviance and AIC indicate that there may be other unobserved factors influencing film ratings that are not captured by this model.

Evaluating multinomial logistic regression model

library(pROC)
# Predict on the test data
test_data$predicted_categories <- predict(multinom_model, newdata = test_data, type = "class")

# Confusion Matrix
confusion_matrix_test <- confusionMatrix(
  data = factor(test_data$predicted_categories, levels = levels(test_data$IMDB_Category)),
  reference = factor(test_data$IMDB_Category)
)
print(confusion_matrix_test)
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  Excellent Good Poor
##   Excellent        26   31    9
##   Good             40   94   63
##   Poor              8   38   47
## 
## Overall Statistics
##                                           
##                Accuracy : 0.4691          
##                  95% CI : (0.4163, 0.5224)
##     No Information Rate : 0.4579          
##     P-Value [Acc > NIR] : 0.35437         
##                                           
##                   Kappa : 0.1448          
##                                           
##  Mcnemar's Test P-Value : 0.06051         
## 
## Statistics by Class:
## 
##                      Class: Excellent Class: Good Class: Poor
## Sensitivity                   0.35135      0.5767      0.3950
## Specificity                   0.85816      0.4663      0.8059
## Pos Pred Value                0.39394      0.4772      0.5054
## Neg Pred Value                0.83448      0.5660      0.7262
## Prevalence                    0.20787      0.4579      0.3343
## Detection Rate                0.07303      0.2640      0.1320
## Detection Prevalence          0.18539      0.5534      0.2612
## Balanced Accuracy             0.60475      0.5215      0.6004
# ROC Curve and AUC for each class
roc_list_test <- list()
auc_list_test <- list()
categories <- levels(test_data$IMDB_Category)

for (category in categories) {
  # Create binary response for "One-vs-All"
  true_binary <- ifelse(test_data$IMDB_Category == category, 1, 0)
  predicted_probs <- predict(multinom_model, newdata = test_data, type = "probs")[, category]
  
  # ROC Curve
  roc_obj_test <- roc(true_binary, predicted_probs)
  roc_list_test[[category]] <- roc_obj_test
  auc_list_test[[category]] <- auc(roc_obj_test)
  
  # Plot ROC Curve for this class
  plot(roc_obj_test, main = paste("ROC Curve for", category, "on Test Data"), col = "blue")
  abline(a = 0, b = 1, col = "gray", lty = 2) # Reference line
  cat("AUC for", category, "on Test Data:", auc_list_test[[category]], "\n")
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Excellent on Test Data: 0.6775925
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## AUC for Good on Test Data: 0.5396548
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## AUC for Poor on Test Data: 0.6835798
# Lift Chart for each category
for (category in categories) {
  # Sort data by predicted probabilities for the chosen category
  test_data <- test_data %>%
    arrange(desc(predict(multinom_model, newdata = test_data, type = "probs")[, category]))
  
  # Add deciles for the chosen category
  test_data$decile <- ntile(predict(multinom_model, newdata = test_data, type = "probs")[, category], 10)
  
  # Calculate Lift
  lift_table_test <- test_data %>%
    group_by(decile) %>%
    summarize(
      total = n(),
      events = sum(IMDB_Category == category),
      cumulative_events = cumsum(events),
      cumulative_percentage = cumulative_events / sum(IMDB_Category == category)
    )
  
  # Plot Lift Chart for the current category
  plot(
    lift_table_test$decile, lift_table_test$cumulative_percentage,
    type = "o", col = "blue", xlab = "Decile", ylab = "Cumulative Gain",
    main = paste("Lift Chart for", category, "on Test Data")
  )
  abline(0, 0.1, col = "gray", lty = 2) # Reference random line
}

Accuracy & Kappa:

Accuracy: 46.91%—indicating moderate performance. Kappa: 0.1448—suggests weak agreement between predictions and actual labels.

Class Performance:

Sensitivity: Best for “Good” (57.67%), weaker for “Excellent” (35.14%) and “Poor” (39.50%). AUC: “Excellent” (0.678) and “Poor” (0.684) show moderate discrimination, while “Good” (0.540) struggles.

Lift Charts:

Flat lift curves for all categories indicate limited predictive power over random assignment.

The model’s performance is marginal and may not effectively predict IMDb categories to optimize film investments. A more robust model, such as Random Forests or XGBoost, could better capture complex patterns and improve prediction accuracy.

Random forest

# Load the required library
library(randomForest)

# Train Random Forest Model for Classification
rf_model <- randomForest(
  IMDB_Category ~ Log_production_budget_adj + PG.13 + R + PG + G + 
    between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count + 
    Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime + 
    Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror + 
    Main_Mystery + Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,
  data = train_data,
  ntree = 500,        # Number of trees
  mtry = 5,           # Number of predictors randomly selected at each split
  importance = TRUE,  # Calculate variable importance
  proximity = TRUE    # Enable proximity matrix for better insights
)

# View the model summary
print(rf_model)
## 
## Call:
##  randomForest(formula = IMDB_Category ~ Log_production_budget_adj +      PG.13 + R + PG + G + between_90_to_135 + Greater_than_135 +      Spring + Summer + Fall + genre_count + Main_Action + Main_Adventure +      Main_Animation + Main_Comedy + Main_Crime + Main_Documentary +      Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery +      Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,      data = train_data, ntree = 500, mtry = 5, importance = TRUE,      proximity = TRUE) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 5
## 
##         OOB estimate of  error rate: 51.93%
## Confusion matrix:
##           Excellent Good Poor class.error
## Excellent        74  102   23   0.6281407
## Good             47  242   80   0.3441734
## Poor             21  157   82   0.6846154
# Feature Importance
importance(rf_model)
##                            Excellent         Good       Poor
## Log_production_budget_adj  5.5562595 -0.617390650  4.4737902
## PG.13                      6.4343914  0.511678030 -0.9170209
## R                          3.5973168  0.007512022  4.4923900
## PG                         0.1780894 -1.327251776  0.2481510
## G                          2.5560984 -0.144992167 -1.5442459
## between_90_to_135          8.0395917  6.164723251  8.1643830
## Greater_than_135          12.5703393  0.710064025  0.7932221
## Spring                     8.6102187 -1.160330481  0.3737738
## Summer                     3.5266447  5.061002726 -5.5845587
## Fall                       6.9514127 -2.522729238  4.0482813
## genre_count                7.6232133 -0.849847956  9.5990737
## Main_Action                7.3320425 -1.252873721 -2.5025475
## Main_Adventure             1.1714017 -4.788950757  0.9223408
## Main_Animation             4.1085949  5.032231578 -6.8322188
## Main_Comedy                8.0625707 -2.297877811  5.0272365
## Main_Crime                 1.2840318  0.028878052  5.2748762
## Main_Documentary          -0.3980004 -1.155840678 -0.4537382
## Main_Drama                27.3651902  3.002580171  9.2495006
## Main_Family                3.7864771  5.528543198 -2.3869721
## Main_Fantasy               1.2341850  2.041381246  6.8349901
## Main_Horror               12.8848234 -5.974479746 12.9673035
## Main_Mystery               3.6792786 -1.003593665 -0.6639728
## Main_History               1.0892667 -0.872438390 -1.5494299
## Main_Romance              -2.8136036 -1.958098679 -1.9709389
## Main_Science_Fiction      -1.0251046 -2.465740800 -1.4690986
## Main_Thriller              7.6044038 -1.387403030  0.7149374
##                           MeanDecreaseAccuracy MeanDecreaseGini
## Log_production_budget_adj           4.16423073        71.437829
## PG.13                               2.52675789         7.323162
## R                                   4.24099406         8.744664
## PG                                 -0.66968922         5.234426
## G                                   0.29227849         1.389615
## between_90_to_135                  12.75845077        11.009628
## Greater_than_135                   10.03884538         6.039729
## Spring                              3.63649875         8.993754
## Summer                              2.46065818         8.577208
## Fall                                4.76076206         9.674141
## genre_count                         8.34598082        29.336847
## Main_Action                        -0.02015565         6.788580
## Main_Adventure                     -2.47071082         4.326703
## Main_Animation                      3.28286937         3.226865
## Main_Comedy                         5.14133968         8.135388
## Main_Crime                          2.63592809         5.119881
## Main_Documentary                   -1.16443631         1.928174
## Main_Drama                         24.12249479        16.174920
## Main_Family                         4.06843841         1.700587
## Main_Fantasy                        6.69061140         4.128141
## Main_Horror                         9.54124123         6.732910
## Main_Mystery                       -0.17380793         1.614617
## Main_History                       -0.93868309         1.711788
## Main_Romance                       -3.61497776         2.977116
## Main_Science_Fiction               -3.15656377         3.258371
## Main_Thriller                       2.87512758         5.750493
varImpPlot(rf_model) # Plot variable importance

Model Performance:

OOB Error Rate: 51.93%, suggesting moderate classification performance.

Confusion Matrix:

“Good” had the lowest class error (34.42%), indicating it is predicted relatively better. “Excellent” and “Poor” had higher class errors (62.81% and 68.46%, respectively).

Feature Importance:

The most important predictors based on Mean Decrease Accuracy and Mean Decrease Gini are:

Main_Drama: Highest importance, strongly contributing to the model’s accuracy and split quality. Log_production_budget_adj and genre_count: Also key predictors, indicating their relevance in categorizing IMDb ratings.

Between_90_to_135: Demonstrated high importance, suggesting runtime as a significant factor. Less influential features include Main_Science_Fiction and Main_Romance.

Variable Influence:

The importance plot emphasizes “Main_Drama” and “Log_production_budget_adj” as pivotal features, highlighting their correlation with IMDb rating categories.

Seasonal variables (e.g., Spring, Summer, Fall) and genre-specific variables (e.g., Main_Comedy, Main_Horror) show varied but moderate impact.

The Random Forest model is better suited for identifying patterns across categories compared to Multinomial Logistic Regression. Its performance indicates potential for predicting IMDb ratings, particularly when focused on key drivers like production budget, genre, and drama. Further optimization, like tuning hyperparameters, may improve predictive accuracy.

Evaluating Random Forest model

# Predict on the test data
test_data$rf_predicted_categories <- predict(rf_model, newdata = test_data)

# Confusion Matrix
rf_confusion_matrix_test <- confusionMatrix(
  data = factor(test_data$rf_predicted_categories, levels = levels(test_data$IMDB_Category)),
  reference = factor(test_data$IMDB_Category)
)
print(rf_confusion_matrix_test)
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  Excellent Good Poor
##   Excellent        25   28   10
##   Good             44  104   64
##   Poor              5   31   45
## 
## Overall Statistics
##                                          
##                Accuracy : 0.4888         
##                  95% CI : (0.4357, 0.542)
##     No Information Rate : 0.4579         
##     P-Value [Acc > NIR] : 0.1320677      
##                                          
##                   Kappa : 0.168          
##                                          
##  Mcnemar's Test P-Value : 0.0008202      
## 
## Statistics by Class:
## 
##                      Class: Excellent Class: Good Class: Poor
## Sensitivity                   0.33784      0.6380      0.3782
## Specificity                   0.86525      0.4404      0.8481
## Pos Pred Value                0.39683      0.4906      0.5556
## Neg Pred Value                0.83276      0.5903      0.7309
## Prevalence                    0.20787      0.4579      0.3343
## Detection Rate                0.07022      0.2921      0.1264
## Detection Prevalence          0.17697      0.5955      0.2275
## Balanced Accuracy             0.60154      0.5392      0.6131
# ROC Curve and AUC for each class
rf_roc_list_test <- list()
rf_auc_list_test <- list()

categories <- levels(test_data$IMDB_Category)
print(categories)
## [1] "Excellent" "Good"      "Poor"
predicted_probs <- predict(rf_model, newdata = test_data, type = "prob")
print(colnames(predicted_probs))
## [1] "Excellent" "Good"      "Poor"
categories <- colnames(predicted_probs)


for (category in categories) {
  # Create binary response for "One-vs-All"
  true_binary <- ifelse(test_data$IMDB_Category == category, 1, 0)
  
  # Extract predicted probabilities for the current category
  rf_predicted_probs <- predicted_probs[, category]  # Use the inspected column name
  
  # ROC Curve
  rf_roc_obj_test <- roc(true_binary, rf_predicted_probs)
  rf_roc_list_test[[category]] <- rf_roc_obj_test
  rf_auc_list_test[[category]] <- auc(rf_roc_obj_test)
  
  # Plot ROC Curve for this class
  plot(rf_roc_obj_test, main = paste("ROC Curve for", category, "on Test Data"), col = "red")
  abline(a = 0, b = 1, col = "gray", lty = 2) # Reference line
  cat("AUC for", category, "on Test Data:", rf_auc_list_test[[category]], "\n")
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Excellent on Test Data: 0.7138442
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## AUC for Good on Test Data: 0.5467275
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## AUC for Poor on Test Data: 0.6822324
# Lift Chart for each category
for (category in categories) {
  # Sort data by predicted probabilities for the chosen category
  test_data <- test_data %>%
    arrange(desc(predict(rf_model, newdata = test_data, type = "prob")[, category]))

  # Add deciles for the chosen category
  test_data$rf_decile <- ntile(predict(rf_model, newdata = test_data, type = "prob")[, category], 10)
  
  # Calculate Lift
  rf_lift_table_test <- test_data %>%
    group_by(rf_decile) %>%
    summarize(
      total = n(),
      events = sum(IMDB_Category == category),
      cumulative_events = cumsum(events),
      cumulative_percentage = cumulative_events / sum(IMDB_Category == category)
    )
  
  # Plot Lift Chart for the current category
  plot(
    rf_lift_table_test$rf_decile, rf_lift_table_test$cumulative_percentage,
    type = "o", col = "red", xlab = "Decile", ylab = "Cumulative Gain",
    main = paste("Lift Chart for", category, "on Test Data")
  )
  abline(0, 0.1, col = "gray", lty = 2) # Reference random line
}

Confusion Matrix Performance:

Overall accuracy of the Random Forest model on test data is 48.88%, with a Kappa statistic of 0.168, indicating a slight improvement over random classification but with room for better performance.

Class-specific sensitivity and specificity:

Excellent: Sensitivity = 33.78%, Specificity = 86.53%, PPV = 39.68%. Good: Sensitivity = 63.80%, Specificity = 44.04%, PPV = 49.06%. Poor: Sensitivity = 37.82%, Specificity = 84.81%, PPV = 55.56%.

Area Under the Curve (AUC):

Excellent: AUC = 0.714, indicating reasonable model performance for this category. Good: AUC = 0.547, suggesting limited model discrimination. Poor: AUC = 0.682, showing moderate classification ability.

Variable Importance:

The most important predictors influencing IMDb categories are: Main_Drama (highest Mean Decrease in Accuracy and Gini). Log_production_budget_adj. Between_90_to_135 and Greater_than_135 for movie runtime. Genre Count and specific genres like Main_Horror and Main_Comedy.

Insights from ROC Curves:

For Excellent, the ROC curve and AUC highlight better classification ability than other categories. Good shows almost random classification, with AUC close to 0.5. Poor is moderately well-classified, but improvements can still be made.

The Random Forest model offers slightly better performance for predicting Excellent-rated films than other categories, with decent AUC (0.714). However, the overall model accuracy and class-specific performance metrics indicate the need for further optimization or exploration of alternative models. Feature engineering or ensemble methods might enhance predictive power, especially for the challenging categories like “Good.”

XGBoost

# Load necessary libraries
library(xgboost)

# Prepare data for XGBoost
x_train <- model.matrix(
  IMDB_Category ~ Log_production_budget_adj + PG.13 + R + PG + G + 
    between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count + 
    Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime + 
    Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror + 
    Main_Mystery + Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,
  data = train_data
)[, -1] # Remove intercept column

# Encode the target variable as numeric (0-based for XGBoost)
y_train <- as.numeric(train_data$IMDB_Category) - 1

# Train the XGBoost model for multi-class classification
xgb_model <- xgboost(
  data = as.matrix(x_train),
  label = y_train,
  objective = "multi:softprob",  # Multi-class classification
  num_class = length(levels(train_data$IMDB_Category)), # Number of classes
  nrounds = 100,                 # Number of boosting rounds
  max_depth = 6,                 # Tree depth
  eta = 0.1,                     # Learning rate
  colsample_bytree = 0.8,        # Subsample ratio of columns
  verbose = 1                    # Print training progress
)
## [1]  train-mlogloss:1.074479 
## [2]  train-mlogloss:1.052844 
## [3]  train-mlogloss:1.027775 
## [4]  train-mlogloss:1.010434 
## [5]  train-mlogloss:0.989777 
## [6]  train-mlogloss:0.972292 
## [7]  train-mlogloss:0.956994 
## [8]  train-mlogloss:0.941578 
## [9]  train-mlogloss:0.926042 
## [10] train-mlogloss:0.913551 
## [11] train-mlogloss:0.902381 
## [12] train-mlogloss:0.888798 
## [13] train-mlogloss:0.876363 
## [14] train-mlogloss:0.865105 
## [15] train-mlogloss:0.853369 
## [16] train-mlogloss:0.845262 
## [17] train-mlogloss:0.837227 
## [18] train-mlogloss:0.829368 
## [19] train-mlogloss:0.820469 
## [20] train-mlogloss:0.813455 
## [21] train-mlogloss:0.806492 
## [22] train-mlogloss:0.798595 
## [23] train-mlogloss:0.792434 
## [24] train-mlogloss:0.785758 
## [25] train-mlogloss:0.777530 
## [26] train-mlogloss:0.772429 
## [27] train-mlogloss:0.767649 
## [28] train-mlogloss:0.762378 
## [29] train-mlogloss:0.755696 
## [30] train-mlogloss:0.751969 
## [31] train-mlogloss:0.747556 
## [32] train-mlogloss:0.744093 
## [33] train-mlogloss:0.739338 
## [34] train-mlogloss:0.732562 
## [35] train-mlogloss:0.728322 
## [36] train-mlogloss:0.723797 
## [37] train-mlogloss:0.720946 
## [38] train-mlogloss:0.717706 
## [39] train-mlogloss:0.714799 
## [40] train-mlogloss:0.711286 
## [41] train-mlogloss:0.707176 
## [42] train-mlogloss:0.705031 
## [43] train-mlogloss:0.701661 
## [44] train-mlogloss:0.698750 
## [45] train-mlogloss:0.694957 
## [46] train-mlogloss:0.691874 
## [47] train-mlogloss:0.689633 
## [48] train-mlogloss:0.687683 
## [49] train-mlogloss:0.683668 
## [50] train-mlogloss:0.681192 
## [51] train-mlogloss:0.679446 
## [52] train-mlogloss:0.677509 
## [53] train-mlogloss:0.675144 
## [54] train-mlogloss:0.673366 
## [55] train-mlogloss:0.670637 
## [56] train-mlogloss:0.668569 
## [57] train-mlogloss:0.665969 
## [58] train-mlogloss:0.663650 
## [59] train-mlogloss:0.660852 
## [60] train-mlogloss:0.658927 
## [61] train-mlogloss:0.655505 
## [62] train-mlogloss:0.652286 
## [63] train-mlogloss:0.650891 
## [64] train-mlogloss:0.648573 
## [65] train-mlogloss:0.644471 
## [66] train-mlogloss:0.641360 
## [67] train-mlogloss:0.640229 
## [68] train-mlogloss:0.638847 
## [69] train-mlogloss:0.636355 
## [70] train-mlogloss:0.634092 
## [71] train-mlogloss:0.631408 
## [72] train-mlogloss:0.629970 
## [73] train-mlogloss:0.627481 
## [74] train-mlogloss:0.624073 
## [75] train-mlogloss:0.621771 
## [76] train-mlogloss:0.619510 
## [77] train-mlogloss:0.614699 
## [78] train-mlogloss:0.612588 
## [79] train-mlogloss:0.610307 
## [80] train-mlogloss:0.606432 
## [81] train-mlogloss:0.603806 
## [82] train-mlogloss:0.601372 
## [83] train-mlogloss:0.598331 
## [84] train-mlogloss:0.596270 
## [85] train-mlogloss:0.594136 
## [86] train-mlogloss:0.592415 
## [87] train-mlogloss:0.590526 
## [88] train-mlogloss:0.588512 
## [89] train-mlogloss:0.586801 
## [90] train-mlogloss:0.585678 
## [91] train-mlogloss:0.584121 
## [92] train-mlogloss:0.582822 
## [93] train-mlogloss:0.581860 
## [94] train-mlogloss:0.577679 
## [95] train-mlogloss:0.576566 
## [96] train-mlogloss:0.574965 
## [97] train-mlogloss:0.573818 
## [98] train-mlogloss:0.572281 
## [99] train-mlogloss:0.568154 
## [100]    train-mlogloss:0.567374
# Feature importance
importance <- xgb.importance(feature_names = colnames(x_train), model = xgb_model)
print(importance)
##                       Feature         Gain       Cover   Frequency
##  1: Log_production_budget_adj 0.4233138823 0.466293781 0.415887850
##  2:               genre_count 0.1072688206 0.060595627 0.121183801
##  3:                Main_Drama 0.0767843067 0.036568122 0.023520249
##  4:                      Fall 0.0413833587 0.021744791 0.045638629
##  5:         between_90_to_135 0.0378092282 0.033511624 0.035669782
##  6:                         R 0.0326238949 0.017379619 0.029127726
##  7:               Main_Horror 0.0299443627 0.043243943 0.022897196
##  8:                    Spring 0.0268838237 0.019189010 0.032866044
##  9:                     PG.13 0.0266688630 0.010705740 0.039096573
## 10:                    Summer 0.0255862226 0.033776733 0.044704050
## 11:               Main_Comedy 0.0238457573 0.017231783 0.025545171
## 12:          Greater_than_135 0.0207961257 0.027010405 0.020404984
## 13:             Main_Thriller 0.0179591402 0.023251360 0.018068536
## 14:                        PG 0.0174209573 0.010606143 0.020560748
## 15:               Main_Action 0.0155807074 0.007177278 0.018380062
## 16:                Main_Crime 0.0152833163 0.026108310 0.014485981
## 17:              Main_Fantasy 0.0123369093 0.024487168 0.008878505
## 18:            Main_Animation 0.0105049052 0.023966499 0.012149533
## 19:            Main_Adventure 0.0101526928 0.007793446 0.012616822
## 20:               Main_Family 0.0062642320 0.026925637 0.007943925
## 21:              Main_Mystery 0.0058649216 0.029570608 0.009968847
## 22:      Main_Science_Fiction 0.0043164510 0.009034000 0.005919003
## 23:                         G 0.0038079085 0.003121660 0.003271028
## 24:          Main_Documentary 0.0034662981 0.005663165 0.003894081
## 25:              Main_History 0.0031460959 0.007969614 0.004517134
## 26:              Main_Romance 0.0009868179 0.007073935 0.002803738
##                       Feature         Gain       Cover   Frequency
xgb.plot.importance(importance)

Feature Importance:

The most important features based on the gain are:

Log_production_budget_adj: Contributes significantly to predictions with the highest gain (42.33%). Genre count: Second most important, indicating the diversity of genres in a film as a strong predictor. Main_Drama, Fall, and between_90_to_135 (runtime) are also notable contributors. Features like Main_Romance and Main_History exhibit negligible influence, suggesting limited predictive power for IMDb categories.

Gain, Cover, and Frequency Insights:

Gain measures the contribution of each feature to improving accuracy. Log_production_budget_adj dominates, followed by genre_count. Cover indicates how frequently a feature appears in the model. While Log_production_budget_adj has high cover, features like Main_Horror and between_90_to_135 also show relatively frequent use in trees. Frequency reflects how often a feature is selected for splitting. Features like Log_production_budget_adj and genre_count are split on frequently.

Performance Indicators:

Feature importance visualization indicates that Log_production_budget_adj and genre_count significantly influence predictions, aligning with domain expectations (e.g., budget and genre diversity affecting IMDb scores).

Lift and Predictive Analysis:

The dominance of features related to budget and genre diversity suggests that these metrics are critical for predicting IMDb performance categories (Excellent, Good, Poor). Runtime and seasonal release windows (e.g., Fall, Spring, Summer) also provide meaningful predictive insights.

The XGBoost model strongly prioritizes production budget, genre diversity, and runtime as the most critical predictors for IMDb rating categories. These insights align with intuitive industry expectations, emphasizing the importance of resource allocation and content variety in determining movie success.

XGBoost Model Evaluation

# Prepare test data matrix
x_test <- model.matrix(
  IMDB_Category ~ Log_production_budget_adj + PG.13 + R + PG + G + 
    between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count + 
    Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime + 
    Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror + 
    Main_Mystery + Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,
  data = test_data
)[, -1]

# Encode test labels as numeric (0-based)
y_test <- as.numeric(test_data$IMDB_Category) - 1


# Reshape predicted probabilities into a matrix
pred_matrix <- matrix(pred_probs, nrow = nrow(x_test), byrow = TRUE)

# Get predicted classes (1-based indexing)
pred_classes <- max.col(pred_matrix) - 1  # Convert from 1-based to 0-based indexing for compatibility

# Convert to factor using the original levels of the dependent variable
test_data$predicted_categories <- factor(pred_classes, labels = levels(test_data$IMDB_Category))

cat("Rows in predicted categories:", length(test_data$predicted_categories), "\n")
## Rows in predicted categories: 356
cat("Rows in test data:", nrow(test_data), "\n")
## Rows in test data: 356
library(caret)
confusion_matrix <- confusionMatrix(
  data = test_data$predicted_categories,
  reference = test_data$IMDB_Category
)
print(confusion_matrix)
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  Excellent Good Poor
##   Excellent        33   62   29
##   Good             26   51   27
##   Poor             15   50   63
## 
## Overall Statistics
##                                          
##                Accuracy : 0.4129         
##                  95% CI : (0.3613, 0.466)
##     No Information Rate : 0.4579         
##     P-Value [Acc > NIR] : 0.9607         
##                                          
##                   Kappa : 0.1285         
##                                          
##  Mcnemar's Test P-Value : 9.302e-06      
## 
## Statistics by Class:
## 
##                      Class: Excellent Class: Good Class: Poor
## Sensitivity                    0.4459      0.3129      0.5294
## Specificity                    0.6773      0.7254      0.7257
## Pos Pred Value                 0.2661      0.4904      0.4922
## Neg Pred Value                 0.8233      0.5556      0.7544
## Prevalence                     0.2079      0.4579      0.3343
## Detection Rate                 0.0927      0.1433      0.1770
## Detection Prevalence           0.3483      0.2921      0.3596
## Balanced Accuracy              0.5616      0.5191      0.6276
# ROC Curve and AUC for each class
library(pROC)

# Initialize lists to store results
roc_list <- list()
auc_list <- list()

# Iterate through categories
for (category in categories) {
  # Create binary response for "One-vs-All"
  true_binary <- ifelse(test_data$IMDB_Category == category, 1, 0)
  
  # Extract predicted probabilities for the current category
  predicted_probs <- pred_matrix[, which(categories == category)]
  
  # ROC Curve
  roc_obj <- roc(true_binary, predicted_probs)
  roc_list[[category]] <- roc_obj
  auc_list[[category]] <- auc(roc_obj)
  
  # Plot ROC Curve
  plot(roc_obj, main = paste("ROC Curve for", category), col = "blue")
  abline(a = 0, b = 1, col = "gray", lty = 2) # Reference line
  cat("AUC for", category, ":", auc_list[[category]], "\n")
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Excellent : 0.5472733
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## AUC for Good : 0.5122223
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## AUC for Poor : 0.6884551
# Lift Chart for each category
library(dplyr)

for (category in categories) {
  # Sort data by predicted probabilities for the chosen category
  test_data <- test_data %>%
    arrange(desc(pred_matrix[, which(categories == category)]))
  
  # Add deciles for the chosen category
  test_data$decile <- ntile(pred_matrix[, which(categories == category)], 10)
  
  # Calculate Lift
  lift_table <- test_data %>%
    group_by(decile) %>%
    summarize(
      total = n(),
      events = sum(IMDB_Category == category),
      cumulative_events = cumsum(events),
      cumulative_percentage = cumulative_events / sum(IMDB_Category == category)
    )
  
  # Plot Lift Chart
  plot(
    lift_table$decile, lift_table$cumulative_percentage,
    type = "o", col = "blue", xlab = "Decile", ylab = "Cumulative Gain",
    main = paste("Lift Chart for", category)
  )
  abline(0, 0.1, col = "gray", lty = 2) # Reference line
}

The accuracy of the XGBoost model is 41.29%, which is relatively low and marginally above random chance. The Kappa statistic (0.1285) reflects slight agreement beyond chance, suggesting limited effectiveness for this classification task. No Information Rate (NIR) is 45.79%, indicating the proportion of the majority class in the dataset. The model’s performance is below NIR, highlighting its underperformance.

Class-Specific Metrics:

Excellent:

Sensitivity (Recall): 44.59% of “Excellent” films were correctly classified. Specificity: 67.73%, indicating moderate success in rejecting non-“Excellent” categories. AUC: 0.547, suggesting minimal ability to differentiate “Excellent” from others.

Good: Sensitivity: 31.29%, indicating significant misclassification. AUC: 0.512, close to random guessing.

Poor: Sensitivity: 52.94%, indicating better-than-average identification. AUC: 0.688, showing moderate discrimination power for “Poor” films.

Confusion Matrix Insights:

High confusion among classes: Many “Good” films were misclassified as “Excellent” or “Poor.” “Poor” has the highest detection rate (52.94%) but is still far from ideal.

ROC Curves: “Poor” class has the highest AUC (0.688), showing reasonable discrimination. Other classes have AUCs near 0.5, reflecting limited predictive capacity.

The XGBoost model struggles with this multi-class classification task, particularly for distinguishing “Excellent” and “Good” IMDb categories. While it performs slightly better for “Poor” films

Polynomial Logistic Regression

# Fit a polynomial logistic regression model
library(nnet)

polynomial_logistic_model <- multinom(
  IMDB_Category ~ poly(Log_production_budget_adj, degree = 2) +
    PG.13 + R + PG + G + between_90_to_135 + Greater_than_135 + 
    Spring + Summer + Fall + genre_count + Main_Action + Main_Adventure + 
    Main_Animation + Main_Comedy + Main_Crime + Main_Documentary + 
    Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery + Main_History + 
    Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,
  data = train_data,
  maxit = 1000 # Increase maximum iterations if convergence is slow
)
## # weights:  87 (56 variable)
## initial  value 909.650975 
## iter  10 value 813.084800
## iter  20 value 792.425301
## iter  30 value 789.841615
## iter  40 value 788.661010
## iter  50 value 788.268289
## iter  60 value 788.130398
## iter  70 value 788.128120
## final  value 788.128057 
## converged
# View model summary
summary(polynomial_logistic_model)
## Call:
## multinom(formula = IMDB_Category ~ poly(Log_production_budget_adj, 
##     degree = 2) + PG.13 + R + PG + G + between_90_to_135 + Greater_than_135 + 
##     Spring + Summer + Fall + genre_count + Main_Action + Main_Adventure + 
##     Main_Animation + Main_Comedy + Main_Crime + Main_Documentary + 
##     Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery + 
##     Main_History + Main_History + Main_Romance + Main_Science_Fiction + 
##     Main_Thriller, data = train_data, maxit = 1000)
## 
## Coefficients:
##      (Intercept) poly(Log_production_budget_adj, degree = 2)1
## Good   23.988125                                    0.1377281
## Poor    8.765919                                    6.0120151
##      poly(Log_production_budget_adj, degree = 2)2     PG.13          R
## Good                                    3.6679933 -11.08989 -11.226961
## Poor                                   -0.0393772   6.23762   5.711377
##              PG          G between_90_to_135 Greater_than_135    Spring
## Good -10.843189 -11.069835         -0.015003       -0.9716645 0.4621885
## Poor   6.634967   4.813758         -1.142498       -2.6539811 0.4246470
##          Summer       Fall genre_count Main_Action Main_Adventure
## Good 0.53561307 -0.1540647  0.01754914   -11.92844      -12.43651
## Poor 0.06041035 -0.5552578 -0.06005683   -12.78934      -13.03532
##      Main_Animation Main_Comedy Main_Crime Main_Documentary Main_Drama
## Good      -13.20400   -12.21288  -12.01493        -13.50688  -12.99334
## Poor      -15.42168   -12.91627  -13.73824        -14.09629  -14.02027
##      Main_Family Main_Fantasy Main_Horror Main_Mystery Main_History
## Good    14.88201    -12.12609   -9.705199     18.30772    -11.76825
## Poor    12.82344    -11.48261  -10.090125     17.13281    -13.48952
##      Main_Romance Main_Science_Fiction Main_Thriller
## Good    -11.95606            -12.29441     -12.01625
## Poor    -13.01636            -12.68923     -12.99570
## 
## Std. Errors:
##      (Intercept) poly(Log_production_budget_adj, degree = 2)1
## Good   0.5378852                                     3.440871
## Poor   0.5775088                                     3.969484
##      poly(Log_production_budget_adj, degree = 2)2     PG.13         R        PG
## Good                                     2.963938 0.3063993 0.3151366 0.3686127
## Poor                                     3.620412 0.3615220 0.3733576 0.4143836
##              G between_90_to_135 Greater_than_135    Spring    Summer      Fall
## Good 0.8382865         0.4163319        0.5655286 0.2951358 0.2860115 0.2576835
## Poor 1.1039891         0.4169914        0.6593857 0.3094216 0.3138867 0.2854426
##      genre_count Main_Action Main_Adventure Main_Animation Main_Comedy
## Good   0.1058137   0.4936114      0.6297194      0.7144038   0.4783541
## Poor   0.1173153   0.5023833      0.6366743      0.8581629   0.4830573
##      Main_Crime Main_Documentary Main_Drama Main_Family Main_Fantasy
## Good  0.5566858         1.034188  0.4595046   0.5886378     1.169425
## Poor  0.6589084         1.112223  0.4721036   0.5886378     1.080924
##      Main_Horror Main_Mystery Main_History Main_Romance Main_Science_Fiction
## Good    1.045242    0.5341723    0.9645703    0.6941953            0.8039164
## Poor    1.047749    0.5341723    1.3070485    0.7268909            0.8143117
##      Main_Thriller
## Good     0.5518813
## Poor     0.5792322
## 
## Residual Deviance: 1576.256 
## AIC: 1688.256

The polynomial logistic regression model incorporates a quadratic transformation (poly) for the variable Log_production_budget_adj. This allows the model to capture potential nonlinear relationships between production budget and IMDb categories. The model successfully converged, achieving a final residual deviance of 1576.256 and an AIC (Akaike Information Criterion) of 1688.256. These metrics suggest slightly improved fit compared to a simpler multinomial logistic regression model without polynomial terms.

Coefficients Analysis:

Intercepts: Represent baseline probabilities for each IMDb category when all predictors are zero.

Log_production_budget_adj: For the “Good” category, the quadratic term is significant, with positive coefficients, indicating a nonlinear relationship with the budget. For the “Poor” category, the linear term is stronger, suggesting that a higher budget correlates negatively with being classified as “Poor.”

Genre & Seasonal Effects: Main_Family and Main_Mystery genres show high positive coefficients for the “Good” category, indicating these genres might increase the likelihood of higher IMDb ratings. Seasonal effects (Spring, Summer, etc.) appear relatively weak overall, but their inclusion contributes slightly to the variance explained.

Rating Classifications (PG, R, PG-13): A strong negative relationship exists between “Good” films and the PG-13 rating, while the R rating has a significant positive relationship with “Poor.”

Fit and Complexity:

The addition of the polynomial term has slightly reduced residual deviance compared to the simpler logistic regression model. However, the improvement is marginal, suggesting that the added complexity might not provide substantial benefit in predictive accuracy. Coefficients for some genres and ratings (e.g., Main_History, G, PG) have relatively high standard errors, indicating less certainty in their contribution to the model.

Observations:

Residual Deviance (1576.256): Reflects the remaining unexplained variance. A lower deviance compared to the simpler multinomial model indicates better fit. AIC (1688.256): While AIC decreased slightly compared to the base model, the difference is small, and overfitting might be a concern with added polynomial terms. Quadratic Relationship in Budget: The model suggests that both very low and very high production budgets might associate with different rating categories, though further testing (e.g., visualization) would validate this interpretation.

Evaluating Polynomial Logistic Regression

# Predict class probabilities on test data
predicted_probabilities <- predict(polynomial_logistic_model, newdata = test_data, type = "probs")

# Predict classes on test data
predicted_classes <- predict(polynomial_logistic_model, newdata = test_data, type = "class")

# Convert predicted classes to a factor
predicted_classes <- factor(predicted_classes, levels = levels(test_data$IMDB_Category))

# Ensure true labels are factors
true_classes <- factor(test_data$IMDB_Category, levels = levels(test_data$IMDB_Category))

library(caret)

# Compute confusion matrix
confusion_matrix <- confusionMatrix(predicted_classes, true_classes)

# Print confusion matrix
print(confusion_matrix)
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  Excellent Good Poor
##   Excellent        25   33    9
##   Good             41   93   66
##   Poor              8   37   44
## 
## Overall Statistics
##                                           
##                Accuracy : 0.4551          
##                  95% CI : (0.4025, 0.5084)
##     No Information Rate : 0.4579          
##     P-Value [Acc > NIR] : 0.56280         
##                                           
##                   Kappa : 0.1212          
##                                           
##  Mcnemar's Test P-Value : 0.02813         
## 
## Statistics by Class:
## 
##                      Class: Excellent Class: Good Class: Poor
## Sensitivity                   0.33784      0.5706      0.3697
## Specificity                   0.85106      0.4456      0.8101
## Pos Pred Value                0.37313      0.4650      0.4944
## Neg Pred Value                0.83045      0.5513      0.7191
## Prevalence                    0.20787      0.4579      0.3343
## Detection Rate                0.07022      0.2612      0.1236
## Detection Prevalence          0.18820      0.5618      0.2500
## Balanced Accuracy             0.59445      0.5081      0.5899
library(pROC)

# Initialize lists for ROC and AUC
roc_list <- list()
auc_list <- list()

# One-vs-All ROC and AUC
for (category in levels(true_classes)) {
  # Create binary labels for the current category
  true_binary <- ifelse(true_classes == category, 1, 0)
  
  # Get predicted probabilities for the current category
  predicted_probs_binary <- predicted_probabilities[, category]
  
  # Skip if true_binary has fewer than two levels
  if (length(unique(true_binary)) < 2) {
    cat("Skipping ROC for", category, "due to insufficient data.\n")
    next
  }
  
  # Compute ROC
  roc_obj <- roc(true_binary, predicted_probs_binary)
  roc_list[[category]] <- roc_obj
  auc_list[[category]] <- auc(roc_obj)
  
  # Plot ROC curve
  plot(roc_obj, main = paste("ROC Curve for", category), col = "blue")
  abline(a = 0, b = 1, col = "gray", lty = 2) # Reference line
  cat("AUC for", category, ":", auc_list[[category]], "\n")
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Excellent : 0.6811865
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## AUC for Good : 0.5362853
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## AUC for Poor : 0.6851044
library(dplyr)

# Specify the category for Lift Chart (e.g., "Good")
category <- "Good"

# Ensure predicted probabilities are in a data frame
predicted_probs_df <- as.data.frame(predicted_probabilities)

# Combine test data with predicted probabilities
test_data_lift <- test_data %>%
  mutate(predicted_prob = predicted_probs_df[[category]])

# Sort by predicted probabilities
test_data_lift <- test_data_lift %>%
  arrange(desc(predicted_prob))

# Add deciles
test_data_lift$decile <- ntile(test_data_lift$predicted_prob, 10)

# Calculate Lift
lift_table <- test_data_lift %>%
  group_by(decile) %>%
  summarize(
    total = n(),
    events = sum(IMDB_Category == category), # Adjust for binary or multi-class context
    cumulative_events = cumsum(events),
    cumulative_percentage = cumulative_events / sum(IMDB_Category == category)
  )



# Plot Lift Chart
plot(
  lift_table$decile, lift_table$cumulative_percentage,
  type = "o", col = "blue", xlab = "Decile", ylab = "Cumulative Gain",
  main = paste("Lift Chart for", category)
)
abline(a = 0, b = 0.1, col = "gray", lty = 2) # Reference line

Confusion Matrix Overview:

Accuracy: 45.51% (indicating that less than half of the test data was correctly classified). Kappa: 0.1212 (a low value, suggesting weak agreement between predicted and actual classes beyond chance). McNemar’s Test P-Value: 0.02813 (significant, implying potential imbalance or systematic differences in misclassification).

Class-Level Performance:

Excellent:

Sensitivity: 33.78% (low, many “Excellent” cases are misclassified). Specificity: 85.11% (good, few non-“Excellent” cases are classified as “Excellent”). AUC: 0.6812 (moderate discriminatory power for “Excellent”).

Good:

Sensitivity: 57.06% (moderate, but nearly half of “Good” cases are misclassified). Specificity: 44.56% (low, many non-“Good” cases are misclassified as “Good”). AUC: 0.5363 (poor discriminatory power for “Good”).

Poor:

Sensitivity: 36.97% (low, many “Poor” cases are misclassified). Specificity: 81.01% (good, relatively few non-“Poor” cases are classified as “Poor”). AUC: 0.6851 (moderate discriminatory power for “Poor”).

Decision tree

# Load required libraries
library(rpart)
library(rpart.plot)

# Train Decision Tree for Classification
dt_model_categorical <- rpart(
  IMDB_Category ~ Log_production_budget_adj + PG.13 + R + PG + G + 
    between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count + 
    Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime + 
    Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror + 
    Main_Mystery + Main_Romance + Main_Science_Fiction + Main_Thriller,
  data = train_data,
  method = "class", # Classification tree
  control = rpart.control(
    cp = 0.005,      # Smaller complexity parameter for more splits
    maxdepth = 10,   # Allow deeper trees
    minsplit = 10    # Minimum observations required to split
  )
)

# Plot the decision tree
rpart.plot(
  dt_model_categorical,
  type = 3,             # Show splits and probabilities
  extra = 101,          # Display n, % observations, and class probabilities
  under = TRUE,         # Show text under the nodes
  fallen.leaves = TRUE, # Spread the leaves horizontally
  box.palette = "Blues" # Color scheme for the boxes
)

# Print a summary of the model
summary(dt_model_categorical)
## Call:
## rpart(formula = IMDB_Category ~ Log_production_budget_adj + PG.13 + 
##     R + PG + G + between_90_to_135 + Greater_than_135 + Spring + 
##     Summer + Fall + genre_count + Main_Action + Main_Adventure + 
##     Main_Animation + Main_Comedy + Main_Crime + Main_Documentary + 
##     Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery + 
##     Main_Romance + Main_Science_Fiction + Main_Thriller, data = train_data, 
##     method = "class", control = rpart.control(cp = 0.005, maxdepth = 10, 
##         minsplit = 10))
##   n= 828 
## 
##            CP nsplit rel error    xerror       xstd
## 1 0.035947712      0 1.0000000 1.0000000 0.03115959
## 2 0.021786492      2 0.9281046 0.9542484 0.03129259
## 3 0.008714597      5 0.8605664 0.9215686 0.03133790
## 4 0.006535948      6 0.8518519 0.9084967 0.03134448
## 5 0.005446623      7 0.8453159 0.9389978 0.03131887
## 6 0.005083515     13 0.8082789 0.9302832 0.03132985
## 7 0.005000000     19 0.7777778 0.9346405 0.03132472
## 
## Variable importance
## Log_production_budget_adj                Main_Drama               genre_count 
##                        27                        19                        11 
##         between_90_to_135                         R          Greater_than_135 
##                         8                         7                         6 
##                     PG.13               Main_Horror                      Fall 
##                         5                         4                         3 
##                    Summer                Main_Crime                    Spring 
##                         3                         2                         2 
##                        PG             Main_Thriller 
##                         1                         1 
## 
## Node number 1: 828 observations,    complexity param=0.03594771
##   predicted class=Good       expected loss=0.5543478  P(node) =1
##     class counts:   199   369   260
##    probabilities: 0.240 0.446 0.314 
##   left son=2 (221 obs) right son=3 (607 obs)
##   Primary splits:
##       Main_Drama       < 0.5      to the right, improve=16.610020, (0 missing)
##       Greater_than_135 < 0.5      to the right, improve= 6.255695, (0 missing)
##       Fall             < 0.5      to the right, improve= 5.406866, (0 missing)
##       Main_Horror      < 0.5      to the left,  improve= 5.275756, (0 missing)
##       R                < 0.5      to the right, improve= 3.838123, (0 missing)
##   Surrogate splits:
##       Log_production_budget_adj < 13.55018 to the left,  agree=0.738, adj=0.018, (0 split)
##       Greater_than_135          < 0.5      to the right, agree=0.736, adj=0.009, (0 split)
## 
## Node number 2: 221 observations,    complexity param=0.03594771
##   predicted class=Excellent  expected loss=0.5701357  P(node) =0.2669082
##     class counts:    95    83    43
##    probabilities: 0.430 0.376 0.195 
##   left son=4 (141 obs) right son=5 (80 obs)
##   Primary splits:
##       Log_production_budget_adj < 16.21795 to the right, improve=6.904754, (0 missing)
##       genre_count               < 1.5      to the left,  improve=2.962116, (0 missing)
##       Spring                    < 0.5      to the left,  improve=2.795708, (0 missing)
##       Greater_than_135          < 0.5      to the right, improve=2.789332, (0 missing)
##       Fall                      < 0.5      to the right, improve=2.387155, (0 missing)
##   Surrogate splits:
##       Summer < 0.5      to the left,  agree=0.665, adj=0.075, (0 split)
## 
## Node number 3: 607 observations,    complexity param=0.02178649
##   predicted class=Good       expected loss=0.5288303  P(node) =0.7330918
##     class counts:   104   286   217
##    probabilities: 0.171 0.471 0.357 
##   left son=6 (289 obs) right son=7 (318 obs)
##   Primary splits:
##       R           < 0.5      to the right, improve=5.952452, (0 missing)
##       PG.13       < 0.5      to the left,  improve=4.376010, (0 missing)
##       Main_Crime  < 0.5      to the right, improve=3.811016, (0 missing)
##       genre_count < 1.5      to the right, improve=3.364676, (0 missing)
##       Main_Horror < 0.5      to the left,  improve=2.925027, (0 missing)
##   Surrogate splits:
##       PG.13                     < 0.5      to the left,  agree=0.857, adj=0.699, (0 split)
##       Log_production_budget_adj < 17.72691 to the left,  agree=0.623, adj=0.208, (0 split)
##       PG                        < 0.5      to the left,  agree=0.608, adj=0.176, (0 split)
##       Main_Crime                < 0.5      to the right, agree=0.568, adj=0.093, (0 split)
##       Main_Horror               < 0.5      to the right, agree=0.554, adj=0.062, (0 split)
## 
## Node number 4: 141 observations,    complexity param=0.005446623
##   predicted class=Excellent  expected loss=0.4964539  P(node) =0.1702899
##     class counts:    71    38    32
##    probabilities: 0.504 0.270 0.227 
##   left son=8 (35 obs) right son=9 (106 obs)
##   Primary splits:
##       genre_count               < 1.5      to the left,  improve=4.003980, (0 missing)
##       Spring                    < 0.5      to the left,  improve=3.278014, (0 missing)
##       Fall                      < 0.5      to the right, improve=2.241058, (0 missing)
##       between_90_to_135         < 0.5      to the left,  improve=1.819467, (0 missing)
##       Log_production_budget_adj < 16.30641 to the left,  improve=1.525703, (0 missing)
##   Surrogate splits:
##       Log_production_budget_adj < 16.38258 to the left,  agree=0.773, adj=0.086, (0 split)
## 
## Node number 5: 80 observations
##   predicted class=Good       expected loss=0.4375  P(node) =0.09661836
##     class counts:    24    45    11
##    probabilities: 0.300 0.562 0.138 
## 
## Node number 6: 289 observations,    complexity param=0.008714597
##   predicted class=Good       expected loss=0.4775087  P(node) =0.3490338
##     class counts:    59   151    79
##    probabilities: 0.204 0.522 0.273 
##   left son=12 (255 obs) right son=13 (34 obs)
##   Primary splits:
##       genre_count               < 1.5      to the right, improve=5.000923, (0 missing)
##       Main_Horror               < 0.5      to the left,  improve=3.495508, (0 missing)
##       Summer                    < 0.5      to the left,  improve=1.745486, (0 missing)
##       Main_Crime                < 0.5      to the right, improve=1.657563, (0 missing)
##       Log_production_budget_adj < 18.80864 to the right, improve=1.486978, (0 missing)
## 
## Node number 7: 318 observations,    complexity param=0.02178649
##   predicted class=Poor       expected loss=0.5660377  P(node) =0.384058
##     class counts:    45   135   138
##    probabilities: 0.142 0.425 0.434 
##   left son=14 (74 obs) right son=15 (244 obs)
##   Primary splits:
##       Fall                      < 0.5      to the right, improve=2.927095, (0 missing)
##       Main_Fantasy              < 0.5      to the left,  improve=2.488508, (0 missing)
##       Log_production_budget_adj < 18.96643 to the right, improve=2.470616, (0 missing)
##       Main_Animation            < 0.5      to the right, improve=2.173962, (0 missing)
##       between_90_to_135         < 0.5      to the right, improve=1.881858, (0 missing)
## 
## Node number 8: 35 observations
##   predicted class=Excellent  expected loss=0.2571429  P(node) =0.04227053
##     class counts:    26     5     4
##    probabilities: 0.743 0.143 0.114 
## 
## Node number 9: 106 observations,    complexity param=0.005446623
##   predicted class=Excellent  expected loss=0.5754717  P(node) =0.1280193
##     class counts:    45    33    28
##    probabilities: 0.425 0.311 0.264 
##   left son=18 (88 obs) right son=19 (18 obs)
##   Primary splits:
##       Spring                    < 0.5      to the left,  improve=2.117829, (0 missing)
##       Log_production_budget_adj < 17.01084 to the left,  improve=1.917719, (0 missing)
##       between_90_to_135         < 0.5      to the left,  improve=1.539968, (0 missing)
##       Fall                      < 0.5      to the right, improve=1.296415, (0 missing)
##       PG.13                     < 0.5      to the left,  improve=1.177982, (0 missing)
## 
## Node number 12: 255 observations,    complexity param=0.005446623
##   predicted class=Good       expected loss=0.4666667  P(node) =0.307971
##     class counts:    59   136    60
##    probabilities: 0.231 0.533 0.235 
##   left son=24 (227 obs) right son=25 (28 obs)
##   Primary splits:
##       Main_Horror               < 0.5      to the left,  improve=2.888411, (0 missing)
##       Summer                    < 0.5      to the left,  improve=1.369162, (0 missing)
##       Main_Mystery              < 0.5      to the right, improve=1.327521, (0 missing)
##       Log_production_budget_adj < 16.38519 to the left,  improve=1.134223, (0 missing)
##       Greater_than_135          < 0.5      to the right, improve=0.990274, (0 missing)
## 
## Node number 13: 34 observations
##   predicted class=Poor       expected loss=0.4411765  P(node) =0.0410628
##     class counts:     0    15    19
##    probabilities: 0.000 0.441 0.559 
## 
## Node number 14: 74 observations
##   predicted class=Good       expected loss=0.472973  P(node) =0.08937198
##     class counts:    13    39    22
##    probabilities: 0.176 0.527 0.297 
## 
## Node number 15: 244 observations,    complexity param=0.02178649
##   predicted class=Poor       expected loss=0.5245902  P(node) =0.294686
##     class counts:    32    96   116
##    probabilities: 0.131 0.393 0.475 
##   left son=30 (38 obs) right son=31 (206 obs)
##   Primary splits:
##       Log_production_budget_adj < 18.63072 to the right, improve=3.104970, (0 missing)
##       between_90_to_135         < 0.5      to the right, improve=1.931922, (0 missing)
##       Summer                    < 0.5      to the right, improve=1.664575, (0 missing)
##       genre_count               < 5.5      to the left,  improve=1.358275, (0 missing)
##       Main_Crime                < 0.5      to the right, improve=1.318719, (0 missing)
## 
## Node number 18: 88 observations,    complexity param=0.005446623
##   predicted class=Excellent  expected loss=0.5340909  P(node) =0.1062802
##     class counts:    41    28    19
##    probabilities: 0.466 0.318 0.216 
##   left son=36 (36 obs) right son=37 (52 obs)
##   Primary splits:
##       Log_production_budget_adj < 17.01084 to the left,  improve=1.993201, (0 missing)
##       PG.13                     < 0.5      to the left,  improve=1.461364, (0 missing)
##       Greater_than_135          < 0.5      to the right, improve=1.288961, (0 missing)
##       between_90_to_135         < 0.5      to the left,  improve=1.288961, (0 missing)
##       R                         < 0.5      to the right, improve=1.039852, (0 missing)
##   Surrogate splits:
##       Summer < 0.5      to the right, agree=0.636, adj=0.111, (0 split)
##       PG     < 0.5      to the right, agree=0.602, adj=0.028, (0 split)
## 
## Node number 19: 18 observations
##   predicted class=Poor       expected loss=0.5  P(node) =0.02173913
##     class counts:     4     5     9
##    probabilities: 0.222 0.278 0.500 
## 
## Node number 24: 227 observations,    complexity param=0.005083515
##   predicted class=Good       expected loss=0.4625551  P(node) =0.2741546
##     class counts:    58   122    47
##    probabilities: 0.256 0.537 0.207 
##   left son=48 (166 obs) right son=49 (61 obs)
##   Primary splits:
##       Summer                    < 0.5      to the left,  improve=1.397154, (0 missing)
##       Main_Mystery              < 0.5      to the left,  improve=1.311551, (0 missing)
##       Log_production_budget_adj < 17.81216 to the left,  improve=1.250035, (0 missing)
##       Main_Action               < 0.5      to the left,  improve=1.179562, (0 missing)
##       Greater_than_135          < 0.5      to the right, improve=0.898311, (0 missing)
##   Surrogate splits:
##       Main_Science_Fiction < 0.5      to the left,  agree=0.736, adj=0.016, (0 split)
## 
## Node number 25: 28 observations,    complexity param=0.005446623
##   predicted class=Good       expected loss=0.5  P(node) =0.03381643
##     class counts:     1    14    13
##    probabilities: 0.036 0.500 0.464 
##   left son=50 (19 obs) right son=51 (9 obs)
##   Primary splits:
##       between_90_to_135         < 0.5      to the right, improve=2.3437760, (0 missing)
##       genre_count               < 2.5      to the right, improve=1.5952380, (0 missing)
##       Log_production_budget_adj < 16.80043 to the right, improve=1.2285710, (0 missing)
##       Fall                      < 0.5      to the left,  improve=0.8964859, (0 missing)
##       Spring                    < 0.5      to the right, improve=0.6127820, (0 missing)
##   Surrogate splits:
##       Log_production_budget_adj < 13.88181 to the right, agree=0.75, adj=0.222, (0 split)
## 
## Node number 30: 38 observations
##   predicted class=Good       expected loss=0.4210526  P(node) =0.04589372
##     class counts:     5    22    11
##    probabilities: 0.132 0.579 0.289 
## 
## Node number 31: 206 observations,    complexity param=0.006535948
##   predicted class=Poor       expected loss=0.4902913  P(node) =0.2487923
##     class counts:    27    74   105
##    probabilities: 0.131 0.359 0.510 
##   left son=62 (5 obs) right son=63 (201 obs)
##   Primary splits:
##       Main_Crime                < 0.5      to the right, improve=1.575144, (0 missing)
##       between_90_to_135         < 0.5      to the right, improve=1.549419, (0 missing)
##       Log_production_budget_adj < 16.57526 to the right, improve=1.318611, (0 missing)
##       genre_count               < 5.5      to the left,  improve=1.176957, (0 missing)
##       Main_Fantasy              < 0.5      to the left,  improve=1.117432, (0 missing)
## 
## Node number 36: 36 observations
##   predicted class=Excellent  expected loss=0.3888889  P(node) =0.04347826
##     class counts:    22     8     6
##    probabilities: 0.611 0.222 0.167 
## 
## Node number 37: 52 observations,    complexity param=0.005446623
##   predicted class=Good       expected loss=0.6153846  P(node) =0.06280193
##     class counts:    19    20    13
##    probabilities: 0.365 0.385 0.250 
##   left son=74 (11 obs) right son=75 (41 obs)
##   Primary splits:
##       between_90_to_135         < 0.5      to the left,  improve=2.7539660, (0 missing)
##       Greater_than_135          < 0.5      to the right, improve=2.7539660, (0 missing)
##       Log_production_budget_adj < 17.1904  to the right, improve=2.3944540, (0 missing)
##       R                         < 0.5      to the right, improve=0.5796703, (0 missing)
##       PG.13                     < 0.5      to the left,  improve=0.5000000, (0 missing)
##   Surrogate splits:
##       Greater_than_135 < 0.5      to the right, agree=1.000, adj=1.000, (0 split)
##       Summer           < 0.5      to the right, agree=0.808, adj=0.091, (0 split)
## 
## Node number 48: 166 observations,    complexity param=0.005083515
##   predicted class=Good       expected loss=0.5  P(node) =0.2004831
##     class counts:    47    83    36
##    probabilities: 0.283 0.500 0.217 
##   left son=96 (119 obs) right son=97 (47 obs)
##   Primary splits:
##       Log_production_budget_adj < 17.45673 to the left,  improve=2.0249130, (0 missing)
##       Main_Mystery              < 0.5      to the left,  improve=1.5460360, (0 missing)
##       Greater_than_135          < 0.5      to the right, improve=1.2166270, (0 missing)
##       Main_Fantasy              < 0.5      to the left,  improve=0.9479144, (0 missing)
##       between_90_to_135         < 0.5      to the left,  improve=0.8451745, (0 missing)
##   Surrogate splits:
##       Greater_than_135     < 0.5      to the left,  agree=0.759, adj=0.149, (0 split)
##       genre_count          < 4.5      to the left,  agree=0.735, adj=0.064, (0 split)
##       Main_Fantasy         < 0.5      to the left,  agree=0.735, adj=0.064, (0 split)
##       Main_Science_Fiction < 0.5      to the left,  agree=0.723, adj=0.021, (0 split)
## 
## Node number 49: 61 observations
##   predicted class=Good       expected loss=0.3606557  P(node) =0.0736715
##     class counts:    11    39    11
##    probabilities: 0.180 0.639 0.180 
## 
## Node number 50: 19 observations
##   predicted class=Good       expected loss=0.3684211  P(node) =0.02294686
##     class counts:     1    12     6
##    probabilities: 0.053 0.632 0.316 
## 
## Node number 51: 9 observations
##   predicted class=Poor       expected loss=0.2222222  P(node) =0.01086957
##     class counts:     0     2     7
##    probabilities: 0.000 0.222 0.778 
## 
## Node number 62: 5 observations
##   predicted class=Good       expected loss=0.2  P(node) =0.006038647
##     class counts:     0     4     1
##    probabilities: 0.000 0.800 0.200 
## 
## Node number 63: 201 observations
##   predicted class=Poor       expected loss=0.4825871  P(node) =0.2427536
##     class counts:    27    70   104
##    probabilities: 0.134 0.348 0.517 
## 
## Node number 74: 11 observations
##   predicted class=Excellent  expected loss=0.2727273  P(node) =0.01328502
##     class counts:     8     2     1
##    probabilities: 0.727 0.182 0.091 
## 
## Node number 75: 41 observations
##   predicted class=Good       expected loss=0.5609756  P(node) =0.04951691
##     class counts:    11    18    12
##    probabilities: 0.268 0.439 0.293 
## 
## Node number 96: 119 observations,    complexity param=0.005083515
##   predicted class=Good       expected loss=0.4453782  P(node) =0.1437198
##     class counts:    32    66    21
##    probabilities: 0.269 0.555 0.176 
##   left son=192 (51 obs) right son=193 (68 obs)
##   Primary splits:
##       Log_production_budget_adj < 16.38258 to the left,  improve=2.3487390, (0 missing)
##       Main_Mystery              < 0.5      to the left,  improve=0.9288612, (0 missing)
##       genre_count               < 2.5      to the left,  improve=0.7656663, (0 missing)
##       Main_Comedy               < 0.5      to the right, improve=0.7382564, (0 missing)
##       Fall                      < 0.5      to the right, improve=0.6309724, (0 missing)
##   Surrogate splits:
##       between_90_to_135 < 0.5      to the left,  agree=0.639, adj=0.157, (0 split)
##       Main_Romance      < 0.5      to the right, agree=0.605, adj=0.078, (0 split)
##       Main_Comedy       < 0.5      to the right, agree=0.597, adj=0.059, (0 split)
## 
## Node number 97: 47 observations,    complexity param=0.005083515
##   predicted class=Good       expected loss=0.6382979  P(node) =0.05676329
##     class counts:    15    17    15
##    probabilities: 0.319 0.362 0.319 
##   left son=194 (9 obs) right son=195 (38 obs)
##   Primary splits:
##       Greater_than_135          < 0.5      to the right, improve=2.042678, (0 missing)
##       Main_Comedy               < 0.5      to the left,  improve=2.009929, (0 missing)
##       Log_production_budget_adj < 17.65504 to the left,  improve=1.368488, (0 missing)
##       between_90_to_135         < 0.5      to the left,  improve=1.120030, (0 missing)
##       Main_Crime                < 0.5      to the right, improve=1.044163, (0 missing)
##   Surrogate splits:
##       between_90_to_135         < 0.5      to the left,  agree=0.957, adj=0.778, (0 split)
##       Log_production_budget_adj < 18.42106 to the right, agree=0.851, adj=0.222, (0 split)
##       Main_Thriller             < 0.5      to the right, agree=0.851, adj=0.222, (0 split)
## 
## Node number 192: 51 observations,    complexity param=0.005083515
##   predicted class=Good       expected loss=0.5686275  P(node) =0.0615942
##     class counts:    19    22    10
##    probabilities: 0.373 0.431 0.196 
##   left son=384 (14 obs) right son=385 (37 obs)
##   Primary splits:
##       Log_production_budget_adj < 16.01368 to the right, improve=2.5246420, (0 missing)
##       between_90_to_135         < 0.5      to the right, improve=1.1500750, (0 missing)
##       Main_Crime                < 0.5      to the right, improve=0.5836317, (0 missing)
##       genre_count               < 3.5      to the right, improve=0.4938440, (0 missing)
##       Fall                      < 0.5      to the right, improve=0.4172967, (0 missing)
## 
## Node number 193: 68 observations
##   predicted class=Good       expected loss=0.3529412  P(node) =0.0821256
##     class counts:    13    44    11
##    probabilities: 0.191 0.647 0.162 
## 
## Node number 194: 9 observations
##   predicted class=Excellent  expected loss=0.3333333  P(node) =0.01086957
##     class counts:     6     2     1
##    probabilities: 0.667 0.222 0.111 
## 
## Node number 195: 38 observations,    complexity param=0.005083515
##   predicted class=Good       expected loss=0.6052632  P(node) =0.04589372
##     class counts:     9    15    14
##    probabilities: 0.237 0.395 0.368 
##   left son=390 (10 obs) right son=391 (28 obs)
##   Primary splits:
##       Log_production_budget_adj < 17.65504 to the left,  improve=2.1323310, (0 missing)
##       genre_count               < 4.5      to the right, improve=1.1718270, (0 missing)
##       Main_Comedy               < 0.5      to the left,  improve=0.9953560, (0 missing)
##       Main_Crime                < 0.5      to the right, improve=0.9061404, (0 missing)
##       Main_Fantasy              < 0.5      to the left,  improve=0.4847118, (0 missing)
## 
## Node number 384: 14 observations
##   predicted class=Excellent  expected loss=0.4285714  P(node) =0.01690821
##     class counts:     8     2     4
##    probabilities: 0.571 0.143 0.286 
## 
## Node number 385: 37 observations
##   predicted class=Good       expected loss=0.4594595  P(node) =0.04468599
##     class counts:    11    20     6
##    probabilities: 0.297 0.541 0.162 
## 
## Node number 390: 10 observations
##   predicted class=Excellent  expected loss=0.5  P(node) =0.01207729
##     class counts:     5     1     4
##    probabilities: 0.500 0.100 0.400 
## 
## Node number 391: 28 observations
##   predicted class=Good       expected loss=0.5  P(node) =0.03381643
##     class counts:     4    14    10
##    probabilities: 0.143 0.500 0.357

Top Important Features:

Log_production_budget_adj (27% importance). Main_Drama (19% importance). genre_count (11% importance). Other significant features include between_90_to_135, R, and Greater_than_135.

Primary Split:

The root node splits on Main_Drama, indicating its high influence in categorizing IMDb ratings.

Tree Insights:

The tree progresses with logical splits like budget size (Log_production_budget_adj) and other categorical predictors such as seasonality (Spring, Fall) and genres (Main_Horror, Main_Crime). Leaf nodes provide predictions with class probabilities, helping interpret how combinations of features lead to specific IMDb categories.

Observations from the Tree Diagram:

The tree visually reveals how combinations of predictors classify movies into Excellent, Good, or Poor.

Evaluating Decision tree

# Predict class probabilities on test data
predicted_probabilities <- predict(dt_model_categorical, newdata = test_data, type = "prob")

# Predict classes on test data
predicted_classes <- predict(dt_model_categorical, newdata = test_data, type = "class")

# Ensure true labels are factors
true_classes <- factor(test_data$IMDB_Category, levels = levels(train_data$IMDB_Category))

# Convert predicted classes to factors
predicted_classes <- factor(predicted_classes, levels = levels(train_data$IMDB_Category))

library(caret)

# Compute confusion matrix
confusion_matrix <- confusionMatrix(predicted_classes, true_classes)

# Print the confusion matrix
print(confusion_matrix)
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  Excellent Good Poor
##   Excellent        19   29    8
##   Good             41   85   53
##   Poor             14   49   58
## 
## Overall Statistics
##                                           
##                Accuracy : 0.4551          
##                  95% CI : (0.4025, 0.5084)
##     No Information Rate : 0.4579          
##     P-Value [Acc > NIR] : 0.5628          
##                                           
##                   Kappa : 0.1259          
##                                           
##  Mcnemar's Test P-Value : 0.2781          
## 
## Statistics by Class:
## 
##                      Class: Excellent Class: Good Class: Poor
## Sensitivity                   0.25676      0.5215      0.4874
## Specificity                   0.86879      0.5130      0.7342
## Pos Pred Value                0.33929      0.4749      0.4793
## Neg Pred Value                0.81667      0.5593      0.7404
## Prevalence                    0.20787      0.4579      0.3343
## Detection Rate                0.05337      0.2388      0.1629
## Detection Prevalence          0.15730      0.5028      0.3399
## Balanced Accuracy             0.56278      0.5172      0.6108
library(pROC)

# Initialize lists for ROC and AUC
roc_list <- list()
auc_list <- list()

# Compute ROC and AUC for each class
for (category in levels(true_classes)) {
  # Create binary labels: 1 for the current class, 0 for all others
  true_binary <- ifelse(true_classes == category, 1, 0)
  
  # Get predicted probabilities for the current class
  predicted_probs_binary <- predicted_probabilities[, category]
  
  # Skip if true_binary has fewer than two levels
  if (length(unique(true_binary)) < 2) {
    cat("Skipping ROC for", category, "due to insufficient data.\n")
    next
  }
  
  # Compute ROC
  roc_obj <- roc(true_binary, predicted_probs_binary)
  roc_list[[category]] <- roc_obj
  auc_list[[category]] <- auc(roc_obj)
  
  # Plot ROC curve
  plot(roc_obj, main = paste("ROC Curve for", category), col = "blue")
  abline(a = 0, b = 1, col = "gray", lty = 2) # Reference line
  cat("AUC for", category, ":", auc_list[[category]], "\n")
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Excellent : 0.7017203
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## AUC for Good : 0.5223148
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## AUC for Poor : 0.6566855
library(dplyr)

# Initialize a list to store Lift Tables for each class
lift_tables <- list()

# Loop through each class
for (category in levels(true_classes)) {
  # Combine test data with predicted probabilities
  test_data_category <- test_data %>%
    mutate(predicted_prob = predicted_probabilities[, category])
  
  # Sort by predicted probabilities for the current class
  test_data_category <- test_data_category %>%
    arrange(desc(predicted_prob))
  
  # Add deciles
  test_data_category$decile <- ntile(test_data_category$predicted_prob, 10)
  
  # Calculate Lift Table
  lift_table <- test_data_category %>%
    group_by(decile) %>%
    summarize(
      total = n(),
      events = sum(IMDB_Category == category),
      cumulative_events = cumsum(events),
      cumulative_percentage = cumulative_events / sum(IMDB_Category == category)
    )
  
  # Store the Lift Table
  lift_tables[[category]] <- lift_table
  
  # Plot Lift Chart for the current category
  plot(
    lift_table$decile, lift_table$cumulative_percentage,
    type = "o", col = "blue", xlab = "Decile", ylab = "Cumulative Gain",
    main = paste("Lift Chart for", category)
  )
  abline(a = 0, b = 0.1, col = "gray", lty = 2) # Reference line
}

Accuracy: The model achieves an overall accuracy of 45.51%, meaning it correctly classifies 45.51% of the samples. Kappa (0.1259): This metric considers the possibility of correct predictions by chance. A low kappa indicates the model’s performance is only slightly better than random guessing.

McNemar’s Test: The p-value (0.2781) suggests that the errors are not significantly imbalanced between the categories.

Per-Class Statistics:

Class: Excellent:

Sensitivity (25.68%): The model identifies only 25.68% of the true Excellent samples correctly. Specificity (86.88%): It performs well at avoiding false positives for Excellent. Balanced Accuracy (56.28%): An average of sensitivity and specificity, showing moderate performance for this class.

Class: Good:

Sensitivity (52.15%): The model identifies about half of the Good samples correctly. Specificity (51.30%): Low ability to avoid false positives for Good. Balanced Accuracy (51.72%): Slightly better than random performance.

Class: Poor:

Sensitivity (48.74%): The model identifies 48.74% of true Poor samples. Specificity (73.42%): Relatively good at avoiding false positives for Poor. Balanced Accuracy (61.08%): Decent performance for this category.

AUC (Area Under the Curve):

Excellent: AUC of 0.7017 indicates moderately good discrimination. Good: AUC of 0.5223 suggests the model struggles to differentiate Good from the other classes. Poor: AUC of 0.6567 indicates better discrimination for Poor compared to Good.


For optimizing film investment, the best model is Random Forest.

Why?

Robust Performance: Random Forest provides consistent and reliable accuracy, ensuring a balanced prediction across all IMDB categories. This minimizes risks in film investment by avoiding overfitting to specific patterns.

Feature Importance: The model naturally ranks features like Log_production_budget_adj, Main_Drama, and genre_count, which are crucial for understanding and predicting the profitability of films. This helps focus on key factors that drive returns.

Handling Complexity: Random Forest effectively handles nonlinear relationships and interactions between features like budget, genre, and seasonal trends, providing insights into complex dependencies that impact film success.

Interpretability: Unlike other models, Random Forest offers clear insights through variable importance plots and easy-to-understand predictions, enabling better decision-making for investments.

Robust to Noise: It is highly robust to outliers and noise in the data, ensuring stable predictions even with imperfect historical data, making it ideal for predicting returns in uncertain film markets.

Conclusion: Random Forest aligns with the business goal of maximizing returns by providing accurate, interpretable, and robust predictions for film investment decisions.



Critic_Score

# Categorize Log_worldwide_gross_adj into buckets
data <- data %>%
  mutate(Critic_score_category = case_when(
    Critic_score <= 38 ~ "Unpopular",
    Critic_score > 38 & Critic_score <= 67 ~ "Moderate",
    Critic_score > 67 ~ "Popular"
  ))

# # Convert to factor
data$Critic_score_category <- as.factor(data$Critic_score_category)

# Check if the transformation is correct
table(data$Critic_score_category)
## 
##  Moderate   Popular Unpopular 
##       353       429       402

Categorized Critic_score into three distinct categories: Unpopular, Moderate, and Popular, based on the following thresholds:

Unpopular: Critic scores ≤ 38 Moderate: Critic scores between 38 and 67 (exclusive of both ends) Popular: Critic scores > 67 The result is a balanced categorical distribution:

Moderate: 353 entries Popular: 429 entries Unpopular: 402 entries

This categorization transforms the continuous critic score into a factor, which is well-suited for classification models. It will enable us to predict how different factors contribute to the likelihood of a film falling into each category.

Spliting the data

# Load necessary libraries for modeling and evaluation
library(caret)
library(glmnet)         # For Ridge and LASSO regression
library(randomForest)    # For Random Forest model
library(xgboost)         # For Gradient Boosting model
library(Metrics)         # For evaluation metrics

set.seed(123)  # For reproducibility

# Split the data
train_indices <- sample(1:nrow(data), size = 0.70 * nrow(data))
train_data <- data[train_indices, ]
test_data <- data[-train_indices, ]

Multinomial logistic regression

# Load the required library
library(nnet)

# Fit the multinomial logistic regression model
multinom_model <- multinom(
  Critic_score_category ~ Log_production_budget_adj + PG.13 + R + PG + G + 
    between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count + 
    Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime + 
    Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror + 
    Main_Mystery + Main_Romance + Main_Science_Fiction + Main_Thriller + Main_History, 
  data = train_data
)
## # weights:  84 (54 variable)
## initial  value 909.650975 
## iter  10 value 838.825698
## iter  20 value 828.388787
## iter  30 value 826.477567
## iter  40 value 825.952070
## iter  50 value 825.753253
## final  value 825.726103 
## converged
# View model summary
summary(multinom_model)
## Call:
## multinom(formula = Critic_score_category ~ Log_production_budget_adj + 
##     PG.13 + R + PG + G + between_90_to_135 + Greater_than_135 + 
##     Spring + Summer + Fall + genre_count + Main_Action + Main_Adventure + 
##     Main_Animation + Main_Comedy + Main_Crime + Main_Documentary + 
##     Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery + 
##     Main_Romance + Main_Science_Fiction + Main_Thriller + Main_History, 
##     data = train_data)
## 
## Coefficients:
##           (Intercept) Log_production_budget_adj      PG.13          R
## Popular     15.376932               -0.30357222 -11.367107 -11.230994
## Unpopular   -3.981568                0.09646786   4.858507   4.226229
##                   PG         G between_90_to_135 Greater_than_135     Spring
## Popular   -11.373409 -9.352309        0.08757648        0.4904924 -0.4731550
## Unpopular   4.847157 -5.849016       -0.81519948       -1.5760898 -0.3449393
##                Summer       Fall genre_count Main_Action Main_Adventure
## Popular   -0.04348603  0.1714663  0.06255986   0.5486513      0.9053951
## Unpopular -0.72742514 -0.3816076 -0.10477532  -0.7044189     -1.2277867
##           Main_Animation Main_Comedy Main_Crime Main_Documentary Main_Drama
## Popular         2.006018   1.0511379  0.8353981        0.4662384  1.4415091
## Unpopular      -1.815775  -0.7921482 -0.7206513      -18.7251508 -0.7117834
##           Main_Family Main_Fantasy Main_Horror Main_Mystery Main_Romance
## Popular    0.04761462     2.472294   0.4807710    0.7496022    1.0096143
## Unpopular -2.79715570     1.190501  -0.4683647    0.1500117   -0.5279382
##           Main_Science_Fiction Main_Thriller Main_History
## Popular             1.45091175     0.7238196    1.0116750
## Unpopular          -0.08134442    -0.5647925   -0.6949359
## 
## Std. Errors:
##           (Intercept) Log_production_budget_adj     PG.13         R        PG
## Popular      1.592421                0.08854112 0.4696713 0.4459946 0.5229701
## Unpopular    1.392592                0.09681059 0.4901424 0.4549889 0.5476292
##                      G between_90_to_135 Greater_than_135    Spring    Summer
## Popular   1.087108e+00         0.3452727        0.5146064 0.2814829 0.2706812
## Unpopular 7.893398e-05         0.3280348        0.5631904 0.2602138 0.2739543
##                Fall genre_count Main_Action Main_Adventure Main_Animation
## Popular   0.2627186   0.1005230     1.25815      1.3116615       1.390671
## Unpopular 0.2646064   0.1023577     0.90605      0.9815192       1.188054
##           Main_Comedy Main_Crime Main_Documentary Main_Drama Main_Family
## Popular      1.254572  1.2871189     1.522110e+00  1.2508904    1.530363
## Unpopular    0.912116  0.9663221     1.268012e-06  0.9095735    1.448535
##           Main_Fantasy Main_Horror Main_Mystery Main_Romance
## Popular       1.674269    1.294508     1.594372     1.356640
## Unpopular     1.380665    0.953787     1.231029     1.030036
##           Main_Science_Fiction Main_Thriller Main_History
## Popular               1.430128      1.284191     1.556984
## Unpopular             1.139918      0.946436     1.358771
## 
## Residual Deviance: 1651.452 
## AIC: 1759.452

Coefficients Interpretation:

Each coefficient indicates the impact of the respective predictor on the log-odds of a category compared to the reference category. For example, in the “Popular” category, Log_production_budget_adj has a coefficient of -0.3035. This means that a one-unit increase in the log of the production budget decreases the log-odds of being “Popular” compared to the reference category.

Category-Specific Coefficients:

“Popular” Category:

High positive coefficients for features like Main_Fantasy, Main_Drama, and Main_Comedy indicate these genres significantly increase the likelihood of a film being categorized as “Popular.”

“Unpopular” Category: Negative coefficients for variables like Main_Action and PG.13 suggest these attributes decrease the likelihood of a film being categorized as “Unpopular.”

Variable Significance:

Variables with larger coefficients and smaller standard errors have a more significant impact on the prediction.

Model Fit:

Residual Deviance: 1651.45, which measures the goodness of fit. Lower values indicate a better fit. AIC (Akaike Information Criterion): 1759.45, used for model comparison. Lower values are better, indicating a more parsimonious model.

Practical Insights for Film Investment:

Films with higher budgets (Log_production_budget_adj) and popular genres like Fantasy, Drama, and Comedy are more likely to achieve a “Popular” critic score. Conversely, targeting features like Action or PG-rated films might reduce the risk of being categorized as “Unpopular.”

Evaluating Multinomial logistic regression

library(pROC)
# Predict on the test data
test_data$predicted_categories <- predict(multinom_model, newdata = test_data, type = "class")

# Confusion Matrix
confusion_matrix_test <- confusionMatrix(
  data = factor(test_data$predicted_categories, levels = levels(test_data$Critic_score_category)),
  reference = factor(test_data$Critic_score_category)
)
print(confusion_matrix_test)
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  Moderate Popular Unpopular
##   Moderate        19      10        13
##   Popular         37      85        41
##   Unpopular       55      25        71
## 
## Overall Statistics
##                                           
##                Accuracy : 0.4916          
##                  95% CI : (0.4385, 0.5448)
##     No Information Rate : 0.3511          
##     P-Value [Acc > NIR] : 3.746e-08       
##                                           
##                   Kappa : 0.2296          
##                                           
##  Mcnemar's Test P-Value : 7.871e-10       
## 
## Statistics by Class:
## 
##                      Class: Moderate Class: Popular Class: Unpopular
## Sensitivity                  0.17117         0.7083           0.5680
## Specificity                  0.90612         0.6695           0.6537
## Pos Pred Value               0.45238         0.5215           0.4702
## Neg Pred Value               0.70701         0.8187           0.7366
## Prevalence                   0.31180         0.3371           0.3511
## Detection Rate               0.05337         0.2388           0.1994
## Detection Prevalence         0.11798         0.4579           0.4242
## Balanced Accuracy            0.53865         0.6889           0.6108
# ROC Curve and AUC for each class
roc_list_test <- list()
auc_list_test <- list()
categories <- levels(test_data$Critic_score_category)

for (category in categories) {
  # Create binary response for "One-vs-All"
  true_binary <- ifelse(test_data$Critic_score_category == category, 1, 0)
  predicted_probs <- predict(multinom_model, newdata = test_data, type = "probs")[, category]
  
  # ROC Curve
  roc_obj_test <- roc(true_binary, predicted_probs)
  roc_list_test[[category]] <- roc_obj_test
  auc_list_test[[category]] <- auc(roc_obj_test)
  
  # Plot ROC Curve for this class
  plot(roc_obj_test, main = paste("ROC Curve for", category, "on Test Data"), col = "blue")
  abline(a = 0, b = 1, col = "gray", lty = 2) # Reference line
  cat("AUC for", category, "on Test Data:", auc_list_test[[category]], "\n")
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Moderate on Test Data: 0.5842618
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## AUC for Popular on Test Data: 0.7382415
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## AUC for Unpopular on Test Data: 0.6526061
# Lift Chart for each category
for (category in categories) {
  # Sort data by predicted probabilities for the chosen category
  test_data <- test_data %>%
    arrange(desc(predict(multinom_model, newdata = test_data, type = "probs")[, category]))
  
  # Add deciles for the chosen category
  test_data$decile <- ntile(predict(multinom_model, newdata = test_data, type = "probs")[, category], 10)
  
  # Calculate Lift
  lift_table_test <- test_data %>%
    group_by(decile) %>%
    summarize(
      total = n(),
      events = sum(Critic_score_category == category),
      cumulative_events = cumsum(events),
      cumulative_percentage = cumulative_events / sum(Critic_score_category == category)
    )
  
  # Plot Lift Chart for the current category
  plot(
    lift_table_test$decile, lift_table_test$cumulative_percentage,
    type = "o", col = "blue", xlab = "Decile", ylab = "Cumulative Gain",
    main = paste("Lift Chart for", category, "on Test Data")
  )
  abline(0, 0.1, col = "gray", lty = 2) # Reference random line
}

Confusion Matrix Summary:

Overall Accuracy:

The accuracy of the model is 49.16%, which is slightly better than random guessing but leaves room for improvement. 95% CI: The accuracy falls between 43.85% and 54.48%.

Class-Wise Performance:

Moderate: Sensitivity (True Positive Rate): 17.12%, indicating difficulty in identifying “Moderate” films. Specificity: 90.61%, meaning it rarely misclassifies other categories as “Moderate.”

Popular: Sensitivity: 70.83%, showing strong capability in identifying “Popular” films. Specificity: 66.95%, meaning some films from other categories are misclassified as “Popular.”

Unpopular: Sensitivity: 56.80%, moderately good at identifying “Unpopular” films. Specificity: 65.37%, indicating some overlap in classification.

Kappa Statistic:

Kappa = 0.2296, indicating weak but meaningful agreement between predicted and actual labels.

McNemar’s Test:

The p-value (7.871e-10) indicates significant differences in the error rates, suggesting potential for improvement.

ROC and AUC Analysis:

Moderate: AUC: 0.584, showing weak discriminative ability for identifying “Moderate” films.

Popular: AUC: 0.738, the strongest performance among all categories, indicating good separation of “Popular” films from the others.

Unpopular: AUC: 0.653, indicating moderate performance for distinguishing “Unpopular” films.

Random Forest

# Load the required library
library(randomForest)

# Check for missing values in the target and predictor variables
sum(is.na(train_data$Critic_score_category))  # Check for missing values in the target
## [1] 0
sum(is.na(train_data$Log_production_budget_adj))  # Check in predictor variables
## [1] 0
# Remove rows with missing values in any column
train_data <- na.omit(train_data)


# Train Random Forest Model for Classification
rf_model <- randomForest(
  Critic_score_category ~ Log_production_budget_adj + PG.13 + R + PG + G + 
    between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count + 
    Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime + 
    Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror + 
    Main_Mystery + Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,
  data = train_data,
  ntree = 500,        # Number of trees
  mtry = 5,           # Number of predictors randomly selected at each split
  importance = TRUE,  # Calculate variable importance
  proximity = TRUE    # Enable proximity matrix for better insights
)


  # View the model summary
print(rf_model)
## 
## Call:
##  randomForest(formula = Critic_score_category ~ Log_production_budget_adj +      PG.13 + R + PG + G + between_90_to_135 + Greater_than_135 +      Spring + Summer + Fall + genre_count + Main_Action + Main_Adventure +      Main_Animation + Main_Comedy + Main_Crime + Main_Documentary +      Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery +      Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,      data = train_data, ntree = 500, mtry = 5, importance = TRUE,      proximity = TRUE) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 5
## 
##         OOB estimate of  error rate: 57.25%
## Confusion matrix:
##           Moderate Popular Unpopular class.error
## Moderate        56     101        85   0.7685950
## Popular         69     176        64   0.4304207
## Unpopular       65      90       122   0.5595668
# Feature Importance
importance(rf_model)
##                             Moderate      Popular   Unpopular
## Log_production_budget_adj -5.7216051 19.191744646  1.04296015
## PG.13                     -2.4280856  5.483846357  5.43853037
## R                         -0.1554883  6.342049162  3.71559750
## PG                         4.3793232  6.059994439 -0.86511967
## G                          0.3358989  8.898097523  4.17515058
## between_90_to_135         -3.0344863  0.650684465  5.51323410
## Greater_than_135          -1.1905628  5.135357005 -2.09562138
## Spring                    -0.2587980  7.054468992 -6.81666118
## Summer                    -3.8201685  5.163605430 -2.73191887
## Fall                       0.8113437  6.323699763 -0.58809841
## genre_count               -2.5320324  1.592881645  5.79971892
## Main_Action                3.8509152 11.280229112 -5.55986056
## Main_Adventure             1.5535627 -0.005996815 -5.63286077
## Main_Animation            -1.0537341  5.088593976 -5.66106716
## Main_Comedy               -5.0483588  5.030949874  2.42392026
## Main_Crime                 0.5242691 -0.207408148  3.99398809
## Main_Documentary          -2.9378110 -1.159651474  4.43153201
## Main_Drama                -1.4850638 19.360889563 -1.19448099
## Main_Family                8.2925192 -0.014756823  0.59981493
## Main_Fantasy               0.4471958 -1.547376773  1.51037568
## Main_Horror               -2.2563048 -0.344036815  0.59036403
## Main_Mystery              -4.3008429 -1.128812528  0.99827230
## Main_History              -4.6698961 -0.793959306 -2.77260201
## Main_Romance              -4.7237143 -3.585018183 -0.04714954
## Main_Science_Fiction      -3.2486970 -1.826148733  0.32346580
## Main_Thriller              0.3760518  2.229674132  1.40716837
##                           MeanDecreaseAccuracy MeanDecreaseGini
## Log_production_budget_adj           10.5565879        84.000810
## PG.13                                6.5230852         8.884131
## R                                    7.7779231         8.574279
## PG                                   6.7709808         5.837082
## G                                    8.3514943         1.868033
## between_90_to_135                    2.6750718        11.277900
## Greater_than_135                     2.0715106         5.155201
## Spring                               0.3991179         9.269284
## Summer                              -0.4552907         9.877839
## Fall                                 4.9532904        10.540366
## genre_count                          3.3383661        30.148347
## Main_Action                          5.5471946         8.349551
## Main_Adventure                      -2.7299650         4.293407
## Main_Animation                       0.9181277         2.667557
## Main_Comedy                          2.0898983         8.707040
## Main_Crime                           2.4942552         5.638045
## Main_Documentary                    -0.8891891         1.530377
## Main_Drama                          13.8847560        11.097511
## Main_Family                          6.5400679         2.116554
## Main_Fantasy                         0.5363267         2.907561
## Main_Horror                         -1.0042383         6.410964
## Main_Mystery                        -2.2299752         2.290019
## Main_History                        -4.6556230         1.552223
## Main_Romance                        -4.7681010         3.518845
## Main_Science_Fiction                -2.3797897         3.338094
## Main_Thriller                        2.4180128         6.305447
varImpPlot(rf_model) # Plot variable importance

Overall Performance:

Out-of-Bag (OOB) Error Rate: 57.25%. This means that approximately 57% of predictions on unseen training data are incorrect, indicating room for improvement or possible overfitting issues.

Class-specific Errors:

Moderate: 76.86% class error—this category is poorly predicted. Popular: 43.04% class error—the model performs relatively well here. Unpopular: 55.96% class error—moderate performance for this category.

Variable Importance:

The MeanDecreaseAccuracy and MeanDecreaseGini metrics indicate which variables are most influential:

Top Features by Importance:

Log_production_budget_adj: Strongest predictor across all categories. genre_count: Plays a significant role in prediction accuracy. Main_Drama: A key genre predictor for categorization. Fall and between_90_to_135 (runtime range): These provide important seasonal and runtime-related context.

Least Important Features:

Genres like Main_History, Main_Documentary, and Main_Romance have minimal impact on predictions.

Insights from Variable Importance Plot:

The importance plot shows that the budget and genres are crucial in determining whether a film is categorized as “Popular,” “Moderate,” or “Unpopular.” Seasonal variables (e.g., Fall, Summer) also contribute to some extent.

Key Observations:

The model struggles to accurately classify the “Moderate” category, likely due to overlaps in features across categories or insufficient training data specific to this group. Popular films are predicted with the highest accuracy, aligning with business goals that focus on recognizing high-performing films.

Evaluating Random Forest

# Predict class probabilities on test data
predicted_probabilities <- predict(rf_model, newdata = test_data, type = "prob")

# Predict classes on test data
predicted_classes <- predict(rf_model, newdata = test_data, type = "class")

# Ensure true labels are factors with the correct levels
true_classes <- factor(test_data$Critic_score_category, levels = levels(train_data$Critic_score_category))

# Convert predicted classes to factors with the same levels
predicted_classes <- factor(predicted_classes, levels = levels(train_data$Critic_score_category))

library(caret)

# Compute the confusion matrix
confusion_matrix <- confusionMatrix(predicted_classes, true_classes)

# Print the confusion matrix
print(confusion_matrix)
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  Moderate Popular Unpopular
##   Moderate        24      21        20
##   Popular         45      78        40
##   Unpopular       42      21        65
## 
## Overall Statistics
##                                           
##                Accuracy : 0.4691          
##                  95% CI : (0.4163, 0.5224)
##     No Information Rate : 0.3511          
##     P-Value [Acc > NIR] : 3.058e-06       
##                                           
##                   Kappa : 0.1986          
##                                           
##  Mcnemar's Test P-Value : 5.253e-05       
## 
## Statistics by Class:
## 
##                      Class: Moderate Class: Popular Class: Unpopular
## Sensitivity                  0.21622         0.6500           0.5200
## Specificity                  0.83265         0.6398           0.7273
## Pos Pred Value               0.36923         0.4785           0.5078
## Neg Pred Value               0.70103         0.7824           0.7368
## Prevalence                   0.31180         0.3371           0.3511
## Detection Rate               0.06742         0.2191           0.1826
## Detection Prevalence         0.18258         0.4579           0.3596
## Balanced Accuracy            0.52443         0.6449           0.6236
# Extract key metrics
accuracy <- confusion_matrix$overall["Accuracy"]
sensitivity <- confusion_matrix$byClass[, "Sensitivity"]
specificity <- confusion_matrix$byClass[, "Specificity"]

# Print metrics
cat("Accuracy:", accuracy, "\n")
## Accuracy: 0.4691011
cat("Sensitivity for each class:\n", sensitivity, "\n")
## Sensitivity for each class:
##  0.2162162 0.65 0.52
cat("Specificity for each class:\n", specificity, "\n")
## Specificity for each class:
##  0.8326531 0.6398305 0.7272727
library(pROC)

# Initialize lists for ROC and AUC
roc_list <- list()
auc_list <- list()

# Compute ROC and AUC for each class
for (category in levels(true_classes)) {
  # Create binary labels: 1 for current class, 0 for all others
  true_binary <- ifelse(true_classes == category, 1, 0)
  
  # Get predicted probabilities for the current class
  predicted_probs_binary <- predicted_probabilities[, category]
  
  # Skip if binary labels have fewer than two levels
  if (length(unique(true_binary)) < 2) {
    cat("Skipping ROC for", category, "due to insufficient data.\n")
    next
  }
  
  # Compute ROC
  roc_obj <- roc(true_binary, predicted_probs_binary)
  roc_list[[category]] <- roc_obj
  auc_list[[category]] <- auc(roc_obj)
  
  # Plot ROC curve
  plot(roc_obj, main = paste("ROC Curve for", category), col = "blue")
  abline(a = 0, b = 1, col = "gray", lty = 2) # Reference line
  cat("AUC for", category, ":", auc_list[[category]], "\n")
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Moderate : 0.5480419
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## AUC for Popular : 0.7070445
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## AUC for Unpopular : 0.6922771
library(dplyr)

# Initialize a list to store Lift Tables for each class
lift_tables <- list()



# Loop through each class
for (category in levels(true_classes)) {
  # Add predicted probabilities for sorting
  test_data_category <- test_data %>%
    mutate(predicted_prob = predicted_probabilities[, category])
  
  # Sort by predicted probabilities for the current class
  test_data_category <- test_data_category %>%
    arrange(desc(predicted_prob))
  
  # Add deciles
  test_data_category$decile <- ntile(test_data_category$predicted_prob, 10)
  
  # Calculate Lift Table
  lift_table <- test_data_category %>%
    group_by(decile) %>%
    summarize(
      total = n(),
      events = sum(Critic_score_category == category),
      cumulative_events = cumsum(events),
      cumulative_percentage = cumulative_events / sum(Critic_score_category == category)
    )
  
  # Store the Lift Table
  lift_tables[[category]] <- lift_table
  
  # Plot Lift Chart for the current category
  plot(
    lift_table$decile, lift_table$cumulative_percentage,
    type = "o", col = "blue", xlab = "Decile", ylab = "Cumulative Gain",
    main = paste("Lift Chart for", category)
  )
  abline(a = 0, b = 0.1, col = "gray", lty = 2) # Reference line
}

Overall Accuracy: The model achieved an accuracy of 46.91%, which indicates moderate performance in predicting critic score categories.

Sensitivity: The ability to correctly identify each category varied:

Moderate: 21.62% sensitivity, indicating poor identification of this class. Popular: 65.00% sensitivity, showing the model performs best for this category. Unpopular: 52.00% sensitivity, showing moderate ability to identify this class.

Specificity: The ability to correctly exclude non-members of each class:

Moderate: 83.27%, indicating good performance in ruling out instances that are not Moderate. Popular: 63.98%, indicating moderate performance in excluding non-Popular categories. Unpopular: 72.73%, showing fair ability to exclude non-Unpopular instances.

AUC (Area Under Curve): Measures the model’s overall performance for each class:

Moderate: AUC = 0.548, indicating poor separation of Moderate from other categories. Popular: AUC = 0.707, showing fair separation for this class. Unpopular: AUC = 0.692, reflecting moderate performance for this category.

Conclusion:

The Random Forest model performs best for the “Popular” category, showing relatively high sensitivity and AUC. However, it struggles with the “Moderate” category, suggesting limitations in distinguishing this class effectively. The overall performance suggests the need for improvement, particularly for underperforming categories like “Moderate.”

XGBoost

# Load necessary libraries
library(xgboost)

# Prepare data for XGBoost
x_train <- model.matrix(
  Critic_score_category ~ Log_production_budget_adj + PG.13 + R + PG + G + 
    between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count + 
    Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime + 
    Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror + 
    Main_Mystery + Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,
  data = train_data
)[, -1] # Remove intercept column

# Encode the target variable as numeric (0-based for XGBoost)
y_train <- as.numeric(train_data$Critic_score_category) - 1

# Train the XGBoost model for multi-class classification
xgb_model <- xgboost(
  data = as.matrix(x_train),
  label = y_train,
  objective = "multi:softprob",  # Multi-class classification
  num_class = length(levels(train_data$Critic_score_category)), # Number of classes
  nrounds = 100,                 # Number of boosting rounds
  max_depth = 6,                 # Tree depth
  eta = 0.1,                     # Learning rate
  colsample_bytree = 0.8,        # Subsample ratio of columns
  verbose = 1                    # Print training progress
)
## [1]  train-mlogloss:1.077933 
## [2]  train-mlogloss:1.057671 
## [3]  train-mlogloss:1.041463 
## [4]  train-mlogloss:1.024305 
## [5]  train-mlogloss:1.008889 
## [6]  train-mlogloss:0.994372 
## [7]  train-mlogloss:0.981503 
## [8]  train-mlogloss:0.964888 
## [9]  train-mlogloss:0.950778 
## [10] train-mlogloss:0.936668 
## [11] train-mlogloss:0.924759 
## [12] train-mlogloss:0.914335 
## [13] train-mlogloss:0.904313 
## [14] train-mlogloss:0.896460 
## [15] train-mlogloss:0.888847 
## [16] train-mlogloss:0.880893 
## [17] train-mlogloss:0.873080 
## [18] train-mlogloss:0.864814 
## [19] train-mlogloss:0.858445 
## [20] train-mlogloss:0.850213 
## [21] train-mlogloss:0.843088 
## [22] train-mlogloss:0.837330 
## [23] train-mlogloss:0.831897 
## [24] train-mlogloss:0.825611 
## [25] train-mlogloss:0.819434 
## [26] train-mlogloss:0.813625 
## [27] train-mlogloss:0.808139 
## [28] train-mlogloss:0.802633 
## [29] train-mlogloss:0.797602 
## [30] train-mlogloss:0.791358 
## [31] train-mlogloss:0.787822 
## [32] train-mlogloss:0.784107 
## [33] train-mlogloss:0.778533 
## [34] train-mlogloss:0.773868 
## [35] train-mlogloss:0.770273 
## [36] train-mlogloss:0.766824 
## [37] train-mlogloss:0.762580 
## [38] train-mlogloss:0.758046 
## [39] train-mlogloss:0.754330 
## [40] train-mlogloss:0.748136 
## [41] train-mlogloss:0.745279 
## [42] train-mlogloss:0.740483 
## [43] train-mlogloss:0.737294 
## [44] train-mlogloss:0.733100 
## [45] train-mlogloss:0.729472 
## [46] train-mlogloss:0.726729 
## [47] train-mlogloss:0.723034 
## [48] train-mlogloss:0.720372 
## [49] train-mlogloss:0.715739 
## [50] train-mlogloss:0.710446 
## [51] train-mlogloss:0.707980 
## [52] train-mlogloss:0.704872 
## [53] train-mlogloss:0.701038 
## [54] train-mlogloss:0.697456 
## [55] train-mlogloss:0.695029 
## [56] train-mlogloss:0.691656 
## [57] train-mlogloss:0.688859 
## [58] train-mlogloss:0.687201 
## [59] train-mlogloss:0.685493 
## [60] train-mlogloss:0.682568 
## [61] train-mlogloss:0.679697 
## [62] train-mlogloss:0.676682 
## [63] train-mlogloss:0.675025 
## [64] train-mlogloss:0.671411 
## [65] train-mlogloss:0.666427 
## [66] train-mlogloss:0.663687 
## [67] train-mlogloss:0.659878 
## [68] train-mlogloss:0.657967 
## [69] train-mlogloss:0.653613 
## [70] train-mlogloss:0.652012 
## [71] train-mlogloss:0.649098 
## [72] train-mlogloss:0.646029 
## [73] train-mlogloss:0.643313 
## [74] train-mlogloss:0.641802 
## [75] train-mlogloss:0.639229 
## [76] train-mlogloss:0.636658 
## [77] train-mlogloss:0.634901 
## [78] train-mlogloss:0.633321 
## [79] train-mlogloss:0.630792 
## [80] train-mlogloss:0.629021 
## [81] train-mlogloss:0.625248 
## [82] train-mlogloss:0.622885 
## [83] train-mlogloss:0.621223 
## [84] train-mlogloss:0.618799 
## [85] train-mlogloss:0.615751 
## [86] train-mlogloss:0.613250 
## [87] train-mlogloss:0.610869 
## [88] train-mlogloss:0.608188 
## [89] train-mlogloss:0.606144 
## [90] train-mlogloss:0.603877 
## [91] train-mlogloss:0.601677 
## [92] train-mlogloss:0.599473 
## [93] train-mlogloss:0.597914 
## [94] train-mlogloss:0.595470 
## [95] train-mlogloss:0.593429 
## [96] train-mlogloss:0.591563 
## [97] train-mlogloss:0.590139 
## [98] train-mlogloss:0.588971 
## [99] train-mlogloss:0.586972 
## [100]    train-mlogloss:0.584293
# Feature importance
importance <- xgb.importance(feature_names = colnames(x_train), model = xgb_model)
print(importance)
##                       Feature        Gain       Cover   Frequency
##  1: Log_production_budget_adj 0.470892612 0.467577316 0.426777006
##  2:               genre_count 0.105903098 0.081240562 0.117334121
##  3:                Main_Drama 0.041003417 0.032683993 0.031180730
##  4:         between_90_to_135 0.037253539 0.031358938 0.045367223
##  5:                     PG.13 0.036774978 0.019829630 0.035022905
##  6:                    Spring 0.036173097 0.019596970 0.040195064
##  7:                      Fall 0.033508871 0.013954377 0.035318457
##  8:                    Summer 0.031427423 0.035562043 0.043298360
##  9:                         R 0.028930734 0.019777284 0.029555194
## 10:               Main_Comedy 0.028573480 0.014610605 0.030441850
## 11:                        PG 0.026235070 0.027202056 0.025565243
## 12:          Greater_than_135 0.018806232 0.022655282 0.015664253
## 13:               Main_Action 0.015085692 0.017014680 0.016846461
## 14:             Main_Thriller 0.013917641 0.008931984 0.011230974
## 15:               Main_Horror 0.013452731 0.010433989 0.014482045
## 16:                Main_Crime 0.011096380 0.008038030 0.010344318
## 17:            Main_Animation 0.008857662 0.023862927 0.011969854
## 18:                         G 0.008538327 0.029323989 0.008718782
## 19:          Main_Documentary 0.005841090 0.025664225 0.009309886
## 20:              Main_Fantasy 0.005778447 0.022974971 0.008275454
## 21:               Main_Family 0.005533681 0.022995320 0.007388799
## 22:            Main_Adventure 0.004910549 0.014002714 0.008571006
## 23:      Main_Science_Fiction 0.004666787 0.006675019 0.005172159
## 24:              Main_Mystery 0.002403996 0.013936794 0.005172159
## 25:              Main_Romance 0.002317772 0.004623207 0.003398847
## 26:              Main_History 0.002116693 0.005473093 0.003398847
##                       Feature        Gain       Cover   Frequency
xgb.plot.importance(importance)

Feature Importance:

The most important feature is Log_production_budget_adj, contributing significantly to the model with a high Gain and Cover. Other top features include genre_count, Main_Drama, and between_90_to_135, indicating their relevance in predicting critic scores.

Model Training Details:

The XGBoost model is trained with multi-class classification (multi:softprob), effectively handling three critic score categories. Parameters like max_depth = 6, eta = 0.1, and colsample_bytree = 0.8 optimize the balance between overfitting and learning efficiency.

Observations from Results:

The Gain column measures the average gain of splits using a feature. The Frequency column reveals how often a feature was used for splits; for instance, Log_production_budget_adj has the highest usage frequency, reinforcing its importance.

Evaluating the XGBoost

# Prepare test data
x_test <- model.matrix(
  Critic_score_category ~ Log_production_budget_adj + PG.13 + R + PG + G + 
    between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count + 
    Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime + 
    Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror + 
    Main_Mystery + Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,
  data = test_data
)[, -1] # Remove intercept column

# Encode the true labels for comparison
true_classes <- factor(test_data$Critic_score_category, levels = levels(train_data$Critic_score_category))

# Predict class probabilities on test data
predicted_probabilities <- predict(xgb_model, as.matrix(x_test))

# Reshape predicted probabilities into a matrix (rows = instances, columns = classes)
predicted_probabilities <- matrix(
  predicted_probabilities, 
  nrow = nrow(x_test), 
  byrow = TRUE
)

# Assign column names for interpretability
colnames(predicted_probabilities) <- levels(train_data$Critic_score_category)

# Predict classes based on maximum probability
predicted_classes <- factor(
  apply(predicted_probabilities, 1, function(row) colnames(predicted_probabilities)[which.max(row)]),
  levels = levels(train_data$Critic_score_category)
)

library(caret)

# Compute the confusion matrix
confusion_matrix <- confusionMatrix(predicted_classes, true_classes)

# Print the confusion matrix
print(confusion_matrix)
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  Moderate Popular Unpopular
##   Moderate        34      23        19
##   Popular         44      72        41
##   Unpopular       33      25        65
## 
## Overall Statistics
##                                           
##                Accuracy : 0.4803          
##                  95% CI : (0.4274, 0.5336)
##     No Information Rate : 0.3511          
##     P-Value [Acc > NIR] : 3.692e-07       
##                                           
##                   Kappa : 0.2167          
##                                           
##  Mcnemar's Test P-Value : 0.002608        
## 
## Statistics by Class:
## 
##                      Class: Moderate Class: Popular Class: Unpopular
## Sensitivity                  0.30631         0.6000           0.5200
## Specificity                  0.82857         0.6398           0.7489
## Pos Pred Value               0.44737         0.4586           0.5285
## Neg Pred Value               0.72500         0.7588           0.7425
## Prevalence                   0.31180         0.3371           0.3511
## Detection Rate               0.09551         0.2022           0.1826
## Detection Prevalence         0.21348         0.4410           0.3455
## Balanced Accuracy            0.56744         0.6199           0.6345
# Extract metrics
accuracy <- confusion_matrix$overall["Accuracy"]
sensitivity <- confusion_matrix$byClass[, "Sensitivity"]
specificity <- confusion_matrix$byClass[, "Specificity"]

# Print key metrics
cat("Accuracy:", accuracy, "\n")
## Accuracy: 0.4803371
cat("Sensitivity for each class:\n", sensitivity, "\n")
## Sensitivity for each class:
##  0.3063063 0.6 0.52
cat("Specificity for each class:\n", specificity, "\n")
## Specificity for each class:
##  0.8285714 0.6398305 0.7489177
library(pROC)

# Initialize lists for ROC and AUC
roc_list <- list()
auc_list <- list()

# Compute ROC and AUC for each class
for (category in levels(true_classes)) {
  # Create binary labels: 1 for the current class, 0 for all others
  true_binary <- ifelse(true_classes == category, 1, 0)
  predicted_probs_binary <- predicted_probabilities[, category]
  
  # Skip if binary labels have fewer than two levels
  if (length(unique(true_binary)) < 2) {
    cat("Skipping ROC for", category, "due to insufficient data.\n")
    next
  }
  
  # Compute ROC
  roc_obj <- roc(true_binary, predicted_probs_binary)
  roc_list[[category]] <- roc_obj
  auc_list[[category]] <- auc(roc_obj)
  
  # Plot ROC curve
  plot(roc_obj, main = paste("ROC Curve for", category), col = "blue")
  abline(a = 0, b = 1, col = "gray", lty = 2) # Reference line
  cat("AUC for", category, ":", auc_list[[category]], "\n")
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Moderate : 0.5807685
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## AUC for Popular : 0.6732521
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## AUC for Unpopular : 0.6639827
library(dplyr)

# Initialize a list to store Lift Tables for each class
lift_tables <- list()

# Loop through each class
for (category in levels(true_classes)) {
  # Combine test data with predicted probabilities
  test_data_category <- test_data %>%
    mutate(predicted_prob = predicted_probabilities[, category])
  
  # Sort by predicted probabilities for the current class
  test_data_category <- test_data_category %>%
    arrange(desc(predicted_prob))
  
  # Add deciles
  test_data_category$decile <- ntile(test_data_category$predicted_prob, 10)
  
  # Calculate Lift Table
  lift_table <- test_data_category %>%
    group_by(decile) %>%
    summarize(
      total = n(),
      events = sum(Critic_score_category == category),
      cumulative_events = cumsum(events),
      cumulative_percentage = cumulative_events / sum(Critic_score_category == category)
    )
  
  # Store the Lift Table
  lift_tables[[category]] <- lift_table
  
  # Plot Lift Chart for the current category
  plot(
    lift_table$decile, lift_table$cumulative_percentage,
    type = "o", col = "blue", xlab = "Decile", ylab = "Cumulative Gain",
    main = paste("Lift Chart for", category)
  )
  abline(a = 0, b = 0.1, col = "gray", lty = 2) # Reference line
}

Accuracy: 48.03%

Indicates that approximately 48% of the predictions made by the model are correct. While not particularly high, this accuracy reflects the challenges of the multi-class nature of the problem.

Sensitivity:

Moderate: 30.63% - The ability to correctly identify instances of the “Moderate” category is relatively low. Popular: 60.00% - The model is reasonably good at identifying “Popular” categories. Unpopular: 52.00% - Performs moderately well in detecting “Unpopular” films.

Specificity:

Moderate: 82.86% - High specificity indicates the model is good at recognizing when a film is not in the “Moderate” category. Popular: 63.98% - Slightly lower specificity for “Popular” categories. Unpopular: 74.89% - The model effectively identifies non-“Unpopular” films.

AUC (Area Under the ROC Curve):

Moderate: 0.5808 - Indicates poor to moderate performance for the “Moderate” category. Popular: 0.6733 - Fairly good discrimination for “Popular” films. Unpopular: 0.6640 - Moderate ability to distinguish “Unpopular” films.

Observations:

The model performs best for the “Popular” category, both in terms of sensitivity and specificity. “Moderate” films are the hardest to classify, as evidenced by the lower sensitivity and AUC. The overall performance is acceptable but leaves room for improvement, especially in accurately identifying the “Moderate” category.

polynonial

# Fit a polynomial logistic regression model
library(nnet)

polynomial_logistic_model <- multinom(
  Critic_score_category ~ poly(Log_production_budget_adj, degree = 2) +
    PG.13 + R + PG + G + between_90_to_135 + Greater_than_135 + 
    Spring + Summer + Fall + genre_count + Main_Action + Main_Adventure + 
    Main_Animation + Main_Comedy + Main_Crime + Main_Documentary + 
    Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery + Main_History + 
    Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,
  data = train_data,
  maxit = 1000 # Increase maximum iterations if convergence is slow
)
## # weights:  87 (56 variable)
## initial  value 909.650975 
## iter  10 value 848.123498
## iter  20 value 830.462966
## iter  30 value 825.737888
## iter  40 value 825.353131
## iter  50 value 825.220748
## iter  60 value 825.158466
## iter  70 value 825.150546
## final  value 825.150110 
## converged
# View model summary
summary(polynomial_logistic_model)
## Call:
## multinom(formula = Critic_score_category ~ poly(Log_production_budget_adj, 
##     degree = 2) + PG.13 + R + PG + G + between_90_to_135 + Greater_than_135 + 
##     Spring + Summer + Fall + genre_count + Main_Action + Main_Adventure + 
##     Main_Animation + Main_Comedy + Main_Crime + Main_Documentary + 
##     Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery + 
##     Main_History + Main_History + Main_Romance + Main_Science_Fiction + 
##     Main_Thriller, data = train_data, maxit = 1000)
## 
## Coefficients:
##           (Intercept) poly(Log_production_budget_adj, degree = 2)1
## Popular     11.565474                                   -11.094519
## Unpopular   -2.333291                                     3.613498
##           poly(Log_production_budget_adj, degree = 2)2      PG.13          R
## Popular                                      3.2002874 -12.634230 -12.483462
## Unpopular                                    0.7660846   4.843208   4.217229
##                   PG          G between_90_to_135 Greater_than_135     Spring
## Popular   -12.642653 -10.642985         0.1063207        0.4544483 -0.4819072
## Unpopular   4.833862  -7.730321        -0.8134503       -1.5856419 -0.3479529
##                Summer       Fall genre_count Main_Action Main_Adventure
## Popular   -0.06116598  0.1720304  0.05304324   0.5042617      0.8005876
## Unpopular -0.73078344 -0.3815510 -0.10637888  -0.7136454     -1.2524360
##           Main_Animation Main_Comedy Main_Crime Main_Documentary Main_Drama
## Popular         1.914293    1.050685  0.8261297        0.4381825  1.4331737
## Unpopular      -1.846541   -0.791375 -0.7199814      -18.0163775 -0.7116571
##           Main_Family Main_Fantasy Main_Horror Main_Mystery Main_History
## Popular    0.05035611     2.373883   0.4630853    0.7588602     1.055985
## Unpopular -2.79678478     1.171337  -0.4731329    0.1522330    -0.681507
##           Main_Romance Main_Science_Fiction Main_Thriller
## Popular      1.0058449            1.3798434     0.7314751
## Unpopular   -0.5297567           -0.1082246    -0.5628852
## 
## Std. Errors:
##           (Intercept) poly(Log_production_budget_adj, degree = 2)1
## Popular     1.0923972                                     3.430354
## Unpopular   0.7623691                                     3.809983
##           poly(Log_production_budget_adj, degree = 2)2     PG.13         R
## Popular                                       3.281846 0.3943373 0.4002812
## Unpopular                                     3.668616 0.2871574 0.2914631
##                  PG            G between_90_to_135 Greater_than_135    Spring
## Popular   0.4390837 1.010275e+00         0.3483848        0.5165330 0.2815779
## Unpopular 0.3560708 1.083521e-05         0.3283432        0.5638362 0.2605881
##              Summer      Fall genre_count Main_Action Main_Adventure
## Popular   0.2709708 0.2625506   0.1010428   1.2580273      1.3143030
## Unpopular 0.2750184 0.2645398   0.1025857   0.9071704      0.9861707
##           Main_Animation Main_Comedy Main_Crime Main_Documentary Main_Drama
## Popular         1.391344   1.2537677  1.2863022     1.525179e+00   1.250117
## Unpopular       1.190353   0.9123376  0.9665375     2.590817e-06   0.909805
##           Main_Family Main_Fantasy Main_Horror Main_Mystery Main_History
## Popular      1.528569     1.675151   1.2944481      1.59332     1.554693
## Unpopular    1.448801     1.383650   0.9541438      1.23109     1.359981
##           Main_Romance Main_Science_Fiction Main_Thriller
## Popular       1.357800             1.430722     1.2833793
## Unpopular     1.030348             1.141132     0.9467002
## 
## Residual Deviance: 1650.3 
## AIC: 1762.3

Model Convergence:

The model converged successfully after 70 iterations with a final value of the residual deviance of 1650.3, and the Akaike Information Criterion (AIC) value is 1762.3. Lower AIC suggests that the model balances goodness of fit and model complexity, but in comparison to simpler models, this is still relatively high.

Key Coefficients:

Features like poly(Log_production_budget_adj, degree = 2), PG.13, and G exhibit significant influence across the target categories (Popular, Unpopular, Moderate). Genre-related features (e.g., Main_Comedy, Main_Family, Main_Drama) also have notable coefficients, particularly for the “Popular” category, indicating their positive association with high critic scores.

Effect of Polynomial Transformation:

The inclusion of a second-degree polynomial transformation for Log_production_budget_adj allows the model to capture non-linear relationships between production budget and critic score categories. A large negative coefficient for the first term and a positive coefficient for the squared term suggest a U-shaped relationship.

Feature Contributions:

Main_Fantasy, Main_Drama, and genre_count show strong associations for the “Popular” category. Negative coefficients for many features under the “Unpopular” category suggest their strong divergence from films with high critic scores.

Residual Deviance:

The residual deviance (1650.3) provides a measure of how well the model fits the training data, though it is high, indicating scope for further optimization or using more flexible models.

Evaluating polynomial

# Predict class probabilities on the test data
predicted_probabilities <- predict(polynomial_logistic_model, newdata = test_data, type = "probs")

# Predict classes on the test data
predicted_classes <- predict(polynomial_logistic_model, newdata = test_data, type = "class")

# Ensure true labels are factors with the correct levels
true_classes <- factor(test_data$Critic_score_category, levels = levels(train_data$Critic_score_category))

# Convert predicted classes to a factor with the same levels
predicted_classes <- factor(predicted_classes, levels = levels(train_data$Critic_score_category))

library(caret)

# Compute the confusion matrix
confusion_matrix <- confusionMatrix(predicted_classes, true_classes)

# Print the confusion matrix
print(confusion_matrix)
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  Moderate Popular Unpopular
##   Moderate        19      12        15
##   Popular         36      80        39
##   Unpopular       56      28        71
## 
## Overall Statistics
##                                           
##                Accuracy : 0.4775          
##                  95% CI : (0.4246, 0.5308)
##     No Information Rate : 0.3511          
##     P-Value [Acc > NIR] : 6.366e-07       
##                                           
##                   Kappa : 0.2085          
##                                           
##  Mcnemar's Test P-Value : 3.638e-08       
## 
## Statistics by Class:
## 
##                      Class: Moderate Class: Popular Class: Unpopular
## Sensitivity                  0.17117         0.6667           0.5680
## Specificity                  0.88980         0.6822           0.6364
## Pos Pred Value               0.41304         0.5161           0.4581
## Neg Pred Value               0.70323         0.8010           0.7313
## Prevalence                   0.31180         0.3371           0.3511
## Detection Rate               0.05337         0.2247           0.1994
## Detection Prevalence         0.12921         0.4354           0.4354
## Balanced Accuracy            0.53048         0.6744           0.6022
# Extract key metrics
accuracy <- confusion_matrix$overall["Accuracy"]
sensitivity <- confusion_matrix$byClass[, "Sensitivity"]
specificity <- confusion_matrix$byClass[, "Specificity"]

# Print metrics
cat("Accuracy:", accuracy, "\n")
## Accuracy: 0.4775281
cat("Sensitivity for each class:\n", sensitivity, "\n")
## Sensitivity for each class:
##  0.1711712 0.6666667 0.568
cat("Specificity for each class:\n", specificity, "\n")
## Specificity for each class:
##  0.8897959 0.6822034 0.6363636
library(pROC)

# Initialize lists for ROC and AUC
roc_list <- list()
auc_list <- list()

# Compute ROC and AUC for each class
for (category in levels(true_classes)) {
  # Create binary labels: 1 for the current class, 0 for all others
  true_binary <- ifelse(true_classes == category, 1, 0)
  
  # Get predicted probabilities for the current class
  predicted_probs_binary <- predicted_probabilities[, category]
  
  # Skip if binary labels have fewer than two levels
  if (length(unique(true_binary)) < 2) {
    cat("Skipping ROC for", category, "due to insufficient data.\n")
    next
  }
  
  # Compute ROC
  roc_obj <- roc(true_binary, predicted_probs_binary)
  roc_list[[category]] <- roc_obj
  auc_list[[category]] <- auc(roc_obj)
  
  # Plot ROC curve
  plot(roc_obj, main = paste("ROC Curve for", category), col = "blue")
  abline(a = 0, b = 1, col = "gray", lty = 2) # Reference line
  cat("AUC for", category, ":", auc_list[[category]], "\n")
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Moderate : 0.5812466
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## AUC for Popular : 0.7346751
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## AUC for Unpopular : 0.6522251
library(dplyr)

# Initialize a list to store Lift Tables for each class
lift_tables <- list()

# Loop through each class
for (category in levels(true_classes)) {
  # Combine test data with predicted probabilities
  test_data_category <- test_data %>%
    mutate(predicted_prob = predicted_probabilities[, category])
  
  # Sort by predicted probabilities for the current class
  test_data_category <- test_data_category %>%
    arrange(desc(predicted_prob))
  
  # Add deciles
  test_data_category$decile <- ntile(test_data_category$predicted_prob, 10)
  
  # Calculate Lift Table
  lift_table <- test_data_category %>%
    group_by(decile) %>%
    summarize(
      total = n(),
      events = sum(Critic_score_category == category),
      cumulative_events = cumsum(events),
      cumulative_percentage = cumulative_events / sum(Critic_score_category == category)
    )
  
  # Store the Lift Table
  lift_tables[[category]] <- lift_table
  
  # Plot Lift Chart for the current category
  plot(
    lift_table$decile, lift_table$cumulative_percentage,
    type = "o", col = "blue", xlab = "Decile", ylab = "Cumulative Gain",
    main = paste("Lift Chart for", category)
  )
  abline(a = 0, b = 0.1, col = "gray", lty = 2) # Reference line
}

Overall Accuracy:

The model achieved an accuracy of 47.75% on the test data, slightly higher than the no-information rate (35.11%), but it indicates there is still significant room for improvement.

Sensitivity (Recall):

Moderate: 17.12% Popular: 66.67% Unpopular: 56.80% The model performs well in identifying “Popular” movies but struggles with “Moderate” and “Unpopular” categories.

Specificity:

Moderate: 88.98% Popular: 68.22% Unpopular: 63.64% High specificity for “Moderate” implies the model rarely misclassifies other categories as “Moderate.”

AUC (Area Under ROC Curve):

Moderate: 0.5812 Popular: 0.7347 Unpopular: 0.6522 The “Popular” category demonstrates the highest AUC, indicating the model has good discriminatory power for this class.

Balanced Accuracy:

Combines sensitivity and specificity, reflecting how well the model distinguishes each class: Moderate: 53.05% Popular: 67.44% Unpopular: 60.22%

Decision tree

# Load required libraries
library(rpart)
library(rpart.plot)

# Train Decision Tree for Classification
dt_model_categorical <- rpart(
  Critic_score_category ~ Log_production_budget_adj + PG.13 + R + PG + G + 
    between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count + 
    Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime + 
    Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror + 
    Main_Mystery + Main_Romance + Main_Science_Fiction + Main_Thriller,
  data = train_data,
  method = "class", # Classification tree
  control = rpart.control(
    cp = 0.005,      # Smaller complexity parameter for more splits
    maxdepth = 10,   # Allow deeper trees
    minsplit = 10    # Minimum observations required to split
  )
)

# Plot the decision tree
rpart.plot(
  dt_model_categorical,
  type = 3,             # Show splits and probabilities
  extra = 101,          # Display n, % observations, and class probabilities
  under = TRUE,         # Show text under the nodes
  fallen.leaves = TRUE, # Spread the leaves horizontally
  box.palette = "Blues" # Color scheme for the boxes
)

# Print a summary of the model
summary(dt_model_categorical)
## Call:
## rpart(formula = Critic_score_category ~ Log_production_budget_adj + 
##     PG.13 + R + PG + G + between_90_to_135 + Greater_than_135 + 
##     Spring + Summer + Fall + genre_count + Main_Action + Main_Adventure + 
##     Main_Animation + Main_Comedy + Main_Crime + Main_Documentary + 
##     Main_Drama + Main_Family + Main_Fantasy + Main_Horror + Main_Mystery + 
##     Main_Romance + Main_Science_Fiction + Main_Thriller, data = train_data, 
##     method = "class", control = rpart.control(cp = 0.005, maxdepth = 10, 
##         minsplit = 10))
##   n= 828 
## 
##            CP nsplit rel error    xerror       xstd
## 1 0.096339114      0 1.0000000 1.0000000 0.02681516
## 2 0.013487476      1 0.9036609 0.9229287 0.02737776
## 3 0.010597303      4 0.8593449 0.9479769 0.02722511
## 4 0.007707129      6 0.8381503 0.9479769 0.02722511
## 5 0.006743738      8 0.8227360 0.9383430 0.02728721
## 6 0.006262042     10 0.8092486 0.9364162 0.02729912
## 7 0.005780347     14 0.7842004 0.9287091 0.02734507
## 8 0.005000000     20 0.7495183 0.9229287 0.02737776
## 
## Variable importance
## Log_production_budget_adj                     PG.13                         R 
##                        32                        14                        12 
##                Main_Drama               genre_count                        PG 
##                         8                         7                         6 
##                      Fall               Main_Comedy                    Spring 
##                         4                         3                         3 
##                         G                    Summer               Main_Horror 
##                         3                         2                         1 
##          Main_Documentary 
##                         1 
## 
## Node number 1: 828 observations,    complexity param=0.09633911
##   predicted class=Popular    expected loss=0.6268116  P(node) =1
##     class counts:   242   309   277
##    probabilities: 0.292 0.373 0.335 
##   left son=2 (325 obs) right son=3 (503 obs)
##   Primary splits:
##       Log_production_budget_adj < 16.72627 to the left,  improve=12.969260, (0 missing)
##       Main_Drama                < 0.5      to the right, improve= 9.259724, (0 missing)
##       PG.13                     < 0.5      to the left,  improve= 8.957151, (0 missing)
##       R                         < 0.5      to the right, improve= 8.756927, (0 missing)
##       Main_Action               < 0.5      to the right, improve= 5.618857, (0 missing)
##   Surrogate splits:
##       Main_Horror      < 0.5      to the right, agree=0.636, adj=0.074, (0 split)
##       genre_count      < 1.5      to the left,  agree=0.616, adj=0.022, (0 split)
##       Main_Documentary < 0.5      to the right, agree=0.614, adj=0.015, (0 split)
##       Main_Drama       < 0.5      to the right, agree=0.609, adj=0.003, (0 split)
## 
## Node number 2: 325 observations,    complexity param=0.006743738
##   predicted class=Popular    expected loss=0.5046154  P(node) =0.3925121
##     class counts:    85   161    79
##    probabilities: 0.262 0.495 0.243 
##   left son=4 (199 obs) right son=5 (126 obs)
##   Primary splits:
##       R                         < 0.5      to the right, improve=4.805881, (0 missing)
##       Log_production_budget_adj < 14.67788 to the right, improve=4.633759, (0 missing)
##       Main_Drama                < 0.5      to the right, improve=4.055589, (0 missing)
##       PG                        < 0.5      to the right, improve=3.914004, (0 missing)
##       PG.13                     < 0.5      to the left,  improve=2.785738, (0 missing)
##   Surrogate splits:
##       PG.13                     < 0.5      to the left,  agree=0.942, adj=0.849, (0 split)
##       PG                        < 0.5      to the left,  agree=0.662, adj=0.127, (0 split)
##       Main_Documentary          < 0.5      to the left,  agree=0.631, adj=0.048, (0 split)
##       Log_production_budget_adj < 16.64124 to the left,  agree=0.625, adj=0.032, (0 split)
##       G                         < 0.5      to the left,  agree=0.618, adj=0.016, (0 split)
## 
## Node number 3: 503 observations,    complexity param=0.01348748
##   predicted class=Unpopular  expected loss=0.6063618  P(node) =0.6074879
##     class counts:   157   148   198
##    probabilities: 0.312 0.294 0.394 
##   left son=6 (285 obs) right son=7 (218 obs)
##   Primary splits:
##       PG.13            < 0.5      to the left,  improve=4.575871, (0 missing)
##       Greater_than_135 < 0.5      to the right, improve=3.624457, (0 missing)
##       Spring           < 0.5      to the left,  improve=3.454015, (0 missing)
##       R                < 0.5      to the right, improve=3.228630, (0 missing)
##       Fall             < 0.5      to the right, improve=3.111938, (0 missing)
##   Surrogate splits:
##       R                         < 0.5      to the right, agree=0.829, adj=0.606, (0 split)
##       PG                        < 0.5      to the right, agree=0.596, adj=0.069, (0 split)
##       Log_production_budget_adj < 18.53217 to the left,  agree=0.586, adj=0.046, (0 split)
##       Main_Action               < 0.5      to the left,  agree=0.579, adj=0.028, (0 split)
##       Main_Romance              < 0.5      to the left,  agree=0.569, adj=0.005, (0 split)
## 
## Node number 4: 199 observations
##   predicted class=Popular    expected loss=0.4271357  P(node) =0.2403382
##     class counts:    47   114    38
##    probabilities: 0.236 0.573 0.191 
## 
## Node number 5: 126 observations,    complexity param=0.006743738
##   predicted class=Popular    expected loss=0.6269841  P(node) =0.1521739
##     class counts:    38    47    41
##    probabilities: 0.302 0.373 0.325 
##   left son=10 (16 obs) right son=11 (110 obs)
##   Primary splits:
##       PG                        < 0.5      to the right, improve=2.378030, (0 missing)
##       Log_production_budget_adj < 14.55532 to the left,  improve=1.814943, (0 missing)
##       PG.13                     < 0.5      to the left,  improve=1.807345, (0 missing)
##       Summer                    < 0.5      to the right, improve=1.668089, (0 missing)
##       Main_Thriller             < 0.5      to the left,  improve=1.256322, (0 missing)
##   Surrogate splits:
##       PG.13       < 0.5      to the left,  agree=0.976, adj=0.812, (0 split)
##       Main_Family < 0.5      to the right, agree=0.889, adj=0.125, (0 split)
## 
## Node number 6: 285 observations,    complexity param=0.01348748
##   predicted class=Moderate   expected loss=0.6631579  P(node) =0.3442029
##     class counts:    96    96    93
##    probabilities: 0.337 0.337 0.326 
##   left son=12 (265 obs) right son=13 (20 obs)
##   Primary splits:
##       Log_production_budget_adj < 18.68043 to the left,  improve=3.441212, (0 missing)
##       genre_count               < 1.5      to the right, improve=2.924045, (0 missing)
##       G                         < 0.5      to the left,  improve=2.676456, (0 missing)
##       Greater_than_135          < 0.5      to the right, improve=2.362950, (0 missing)
##       Spring                    < 0.5      to the left,  improve=1.496491, (0 missing)
## 
## Node number 7: 218 observations,    complexity param=0.007707129
##   predicted class=Unpopular  expected loss=0.5183486  P(node) =0.263285
##     class counts:    61    52   105
##    probabilities: 0.280 0.239 0.482 
##   left son=14 (51 obs) right son=15 (167 obs)
##   Primary splits:
##       Fall                      < 0.5      to the right, improve=3.037609, (0 missing)
##       Log_production_budget_adj < 17.9594  to the right, improve=2.619436, (0 missing)
##       Main_Drama                < 0.5      to the right, improve=2.382238, (0 missing)
##       genre_count               < 3.5      to the left,  improve=1.978286, (0 missing)
##       Main_Crime                < 0.5      to the right, improve=1.567336, (0 missing)
## 
## Node number 10: 16 observations
##   predicted class=Moderate   expected loss=0.4375  P(node) =0.01932367
##     class counts:     9     2     5
##    probabilities: 0.562 0.125 0.312 
## 
## Node number 11: 110 observations,    complexity param=0.005780347
##   predicted class=Popular    expected loss=0.5909091  P(node) =0.1328502
##     class counts:    29    45    36
##    probabilities: 0.264 0.409 0.327 
##   left son=22 (9 obs) right son=23 (101 obs)
##   Primary splits:
##       Log_production_budget_adj < 14.55532 to the left,  improve=1.7477950, (0 missing)
##       Summer                    < 0.5      to the right, improve=1.7295280, (0 missing)
##       Main_Drama                < 0.5      to the right, improve=1.6392590, (0 missing)
##       Main_Thriller             < 0.5      to the left,  improve=1.3436360, (0 missing)
##       Main_Documentary          < 0.5      to the right, improve=0.7108062, (0 missing)
## 
## Node number 12: 265 observations,    complexity param=0.01348748
##   predicted class=Moderate   expected loss=0.645283  P(node) =0.3200483
##     class counts:    94    83    88
##    probabilities: 0.355 0.313 0.332 
##   left son=24 (236 obs) right son=25 (29 obs)
##   Primary splits:
##       genre_count      < 1.5      to the right, improve=2.357373, (0 missing)
##       Greater_than_135 < 0.5      to the right, improve=2.303430, (0 missing)
##       G                < 0.5      to the left,  improve=2.147660, (0 missing)
##       Main_Drama       < 0.5      to the left,  improve=1.636870, (0 missing)
##       Spring           < 0.5      to the left,  improve=1.509204, (0 missing)
## 
## Node number 13: 20 observations
##   predicted class=Popular    expected loss=0.35  P(node) =0.02415459
##     class counts:     2    13     5
##    probabilities: 0.100 0.650 0.250 
## 
## Node number 14: 51 observations,    complexity param=0.007707129
##   predicted class=Popular    expected loss=0.6078431  P(node) =0.0615942
##     class counts:    14    20    17
##    probabilities: 0.275 0.392 0.333 
##   left son=28 (21 obs) right son=29 (30 obs)
##   Primary splits:
##       Main_Drama                < 0.5      to the right, improve=1.9232490, (0 missing)
##       genre_count               < 3.5      to the left,  improve=1.8248370, (0 missing)
##       Log_production_budget_adj < 18.06355 to the left,  improve=1.2516040, (0 missing)
##       Greater_than_135          < 0.5      to the right, improve=0.6053922, (0 missing)
##       Main_Action               < 0.5      to the right, improve=0.3153515, (0 missing)
##   Surrogate splits:
##       genre_count               < 2.5      to the left,  agree=0.686, adj=0.238, (0 split)
##       Log_production_budget_adj < 17.84238 to the left,  agree=0.667, adj=0.190, (0 split)
##       Greater_than_135          < 0.5      to the right, agree=0.608, adj=0.048, (0 split)
##       Main_Action               < 0.5      to the left,  agree=0.608, adj=0.048, (0 split)
## 
## Node number 15: 167 observations
##   predicted class=Unpopular  expected loss=0.4730539  P(node) =0.2016908
##     class counts:    47    32    88
##    probabilities: 0.281 0.192 0.527 
## 
## Node number 22: 9 observations
##   predicted class=Popular    expected loss=0.3333333  P(node) =0.01086957
##     class counts:     3     6     0
##    probabilities: 0.333 0.667 0.000 
## 
## Node number 23: 101 observations,    complexity param=0.005780347
##   predicted class=Popular    expected loss=0.6138614  P(node) =0.1219807
##     class counts:    26    39    36
##    probabilities: 0.257 0.386 0.356 
##   left son=46 (20 obs) right son=47 (81 obs)
##   Primary splits:
##       Summer                    < 0.5      to the right, improve=1.2812740, (0 missing)
##       Main_Drama                < 0.5      to the right, improve=1.2598700, (0 missing)
##       Log_production_budget_adj < 14.98777 to the right, improve=1.2034760, (0 missing)
##       Main_Thriller             < 0.5      to the left,  improve=1.0795780, (0 missing)
##       Main_Documentary          < 0.5      to the right, improve=0.9192429, (0 missing)
##   Surrogate splits:
##       PG.13 < 0.5      to the left,  agree=0.812, adj=0.05, (0 split)
## 
## Node number 24: 236 observations,    complexity param=0.0105973
##   predicted class=Moderate   expected loss=0.6313559  P(node) =0.2850242
##     class counts:    87    77    72
##    probabilities: 0.369 0.326 0.305 
##   left son=48 (233 obs) right son=49 (3 obs)
##   Primary splits:
##       G                         < 0.5      to the left,  improve=2.075035, (0 missing)
##       Main_Drama                < 0.5      to the left,  improve=1.654108, (0 missing)
##       Greater_than_135          < 0.5      to the right, improve=1.330858, (0 missing)
##       Log_production_budget_adj < 18.58293 to the left,  improve=1.295880, (0 missing)
##       genre_count               < 2.5      to the left,  improve=1.167829, (0 missing)
## 
## Node number 25: 29 observations
##   predicted class=Unpopular  expected loss=0.4482759  P(node) =0.03502415
##     class counts:     7     6    16
##    probabilities: 0.241 0.207 0.552 
## 
## Node number 28: 21 observations
##   predicted class=Popular    expected loss=0.4285714  P(node) =0.02536232
##     class counts:     5    12     4
##    probabilities: 0.238 0.571 0.190 
## 
## Node number 29: 30 observations
##   predicted class=Unpopular  expected loss=0.5666667  P(node) =0.03623188
##     class counts:     9     8    13
##    probabilities: 0.300 0.267 0.433 
## 
## Node number 46: 20 observations
##   predicted class=Popular    expected loss=0.45  P(node) =0.02415459
##     class counts:     5    11     4
##    probabilities: 0.250 0.550 0.200 
## 
## Node number 47: 81 observations,    complexity param=0.005780347
##   predicted class=Unpopular  expected loss=0.6049383  P(node) =0.09782609
##     class counts:    21    28    32
##    probabilities: 0.259 0.346 0.395 
##   left son=94 (57 obs) right son=95 (24 obs)
##   Primary splits:
##       Main_Drama                < 0.5      to the left,  improve=2.0547430, (0 missing)
##       Log_production_budget_adj < 15.64706 to the right, improve=1.3793050, (0 missing)
##       Main_Documentary          < 0.5      to the right, improve=1.0037990, (0 missing)
##       between_90_to_135         < 0.5      to the right, improve=0.7613285, (0 missing)
##       Main_Thriller             < 0.5      to the left,  improve=0.7412346, (0 missing)
## 
## Node number 48: 233 observations,    complexity param=0.0105973
##   predicted class=Moderate   expected loss=0.6266094  P(node) =0.281401
##     class counts:    87    74    72
##    probabilities: 0.373 0.318 0.309 
##   left son=96 (188 obs) right son=97 (45 obs)
##   Primary splits:
##       Main_Drama                < 0.5      to the left,  improve=1.870095, (0 missing)
##       Greater_than_135          < 0.5      to the right, improve=1.409792, (0 missing)
##       Log_production_budget_adj < 18.00082 to the left,  improve=1.339468, (0 missing)
##       Spring                    < 0.5      to the left,  improve=1.245670, (0 missing)
##       genre_count               < 2.5      to the left,  improve=1.102906, (0 missing)
##   Surrogate splits:
##       Log_production_budget_adj < 16.77728 to the right, agree=0.815, adj=0.044, (0 split)
## 
## Node number 49: 3 observations
##   predicted class=Popular    expected loss=0  P(node) =0.003623188
##     class counts:     0     3     0
##    probabilities: 0.000 1.000 0.000 
## 
## Node number 94: 57 observations
##   predicted class=Unpopular  expected loss=0.5789474  P(node) =0.06884058
##     class counts:    18    15    24
##    probabilities: 0.316 0.263 0.421 
## 
## Node number 95: 24 observations,    complexity param=0.005780347
##   predicted class=Popular    expected loss=0.4583333  P(node) =0.02898551
##     class counts:     3    13     8
##    probabilities: 0.125 0.542 0.333 
##   left son=190 (14 obs) right son=191 (10 obs)
##   Primary splits:
##       Log_production_budget_adj < 16.11692 to the right, improve=2.2309520, (0 missing)
##       genre_count               < 2.5      to the left,  improve=0.6745614, (0 missing)
##       Spring                    < 0.5      to the right, improve=0.4055556, (0 missing)
##       Fall                      < 0.5      to the left,  improve=0.4023810, (0 missing)
##   Surrogate splits:
##       Spring      < 0.5      to the left,  agree=0.625, adj=0.1, (0 split)
##       genre_count < 2.5      to the left,  agree=0.625, adj=0.1, (0 split)
## 
## Node number 96: 188 observations,    complexity param=0.006262042
##   predicted class=Moderate   expected loss=0.606383  P(node) =0.2270531
##     class counts:    74    53    61
##    probabilities: 0.394 0.282 0.324 
##   left son=192 (48 obs) right son=193 (140 obs)
##   Primary splits:
##       genre_count               < 2.5      to the left,  improve=1.999012, (0 missing)
##       Log_production_budget_adj < 17.21913 to the left,  improve=1.761271, (0 missing)
##       Greater_than_135          < 0.5      to the right, improve=1.123146, (0 missing)
##       R                         < 0.5      to the right, improve=0.974878, (0 missing)
##       PG                        < 0.5      to the left,  improve=0.974878, (0 missing)
##   Surrogate splits:
##       Log_production_budget_adj < 16.82575 to the left,  agree=0.755, adj=0.042, (0 split)
##       Main_Family               < 0.5      to the right, agree=0.750, adj=0.021, (0 split)
##       Main_Horror               < 0.5      to the right, agree=0.750, adj=0.021, (0 split)
##       Main_Mystery              < 0.5      to the right, agree=0.750, adj=0.021, (0 split)
## 
## Node number 97: 45 observations
##   predicted class=Popular    expected loss=0.5333333  P(node) =0.05434783
##     class counts:    13    21    11
##    probabilities: 0.289 0.467 0.244 
## 
## Node number 190: 14 observations
##   predicted class=Popular    expected loss=0.2857143  P(node) =0.01690821
##     class counts:     2    10     2
##    probabilities: 0.143 0.714 0.143 
## 
## Node number 191: 10 observations
##   predicted class=Unpopular  expected loss=0.4  P(node) =0.01207729
##     class counts:     1     3     6
##    probabilities: 0.100 0.300 0.600 
## 
## Node number 192: 48 observations
##   predicted class=Moderate   expected loss=0.5  P(node) =0.05797101
##     class counts:    24    15     9
##    probabilities: 0.500 0.312 0.187 
## 
## Node number 193: 140 observations,    complexity param=0.006262042
##   predicted class=Unpopular  expected loss=0.6285714  P(node) =0.1690821
##     class counts:    50    38    52
##    probabilities: 0.357 0.271 0.371 
##   left son=386 (27 obs) right son=387 (113 obs)
##   Primary splits:
##       Log_production_budget_adj < 17.2662  to the left,  improve=1.0255930, (0 missing)
##       Greater_than_135          < 0.5      to the right, improve=1.0130130, (0 missing)
##       Spring                    < 0.5      to the left,  improve=0.9731251, (0 missing)
##       Main_Fantasy              < 0.5      to the left,  improve=0.9407563, (0 missing)
##       genre_count               < 5.5      to the left,  improve=0.6700035, (0 missing)
## 
## Node number 386: 27 observations,    complexity param=0.005780347
##   predicted class=Moderate   expected loss=0.5185185  P(node) =0.0326087
##     class counts:    13     4    10
##    probabilities: 0.481 0.148 0.370 
##   left son=772 (24 obs) right son=773 (3 obs)
##   Primary splits:
##       Spring                    < 0.5      to the left,  improve=2.1944440, (0 missing)
##       Log_production_budget_adj < 16.87818 to the left,  improve=1.4444440, (0 missing)
##       between_90_to_135         < 0.5      to the left,  improve=0.4444444, (0 missing)
##       Main_Thriller             < 0.5      to the left,  improve=0.3015873, (0 missing)
##       R                         < 0.5      to the right, improve=0.1835749, (0 missing)
## 
## Node number 387: 113 observations,    complexity param=0.006262042
##   predicted class=Unpopular  expected loss=0.6283186  P(node) =0.1364734
##     class counts:    37    34    42
##    probabilities: 0.327 0.301 0.372 
##   left son=774 (24 obs) right son=775 (89 obs)
##   Primary splits:
##       Log_production_budget_adj < 18.32308 to the right, improve=1.5011770, (0 missing)
##       Main_Fantasy              < 0.5      to the left,  improve=0.9937891, (0 missing)
##       Greater_than_135          < 0.5      to the right, improve=0.8847240, (0 missing)
##       Main_Adventure            < 0.5      to the right, improve=0.8754166, (0 missing)
##       Spring                    < 0.5      to the right, improve=0.6983541, (0 missing)
## 
## Node number 772: 24 observations
##   predicted class=Moderate   expected loss=0.4583333  P(node) =0.02898551
##     class counts:    13     4     7
##    probabilities: 0.542 0.167 0.292 
## 
## Node number 773: 3 observations
##   predicted class=Unpopular  expected loss=0  P(node) =0.003623188
##     class counts:     0     0     3
##    probabilities: 0.000 0.000 1.000 
## 
## Node number 774: 24 observations,    complexity param=0.005780347
##   predicted class=Moderate   expected loss=0.5  P(node) =0.02898551
##     class counts:    12     4     8
##    probabilities: 0.500 0.167 0.333 
##   left son=1548 (21 obs) right son=1549 (3 obs)
##   Primary splits:
##       Main_Comedy               < 0.5      to the left,  improve=2.4761900, (0 missing)
##       between_90_to_135         < 0.5      to the right, improve=1.3333330, (0 missing)
##       Log_production_budget_adj < 18.58293 to the left,  improve=1.2666670, (0 missing)
##       Greater_than_135          < 0.5      to the left,  improve=0.8666667, (0 missing)
##       Main_Action               < 0.5      to the right, improve=0.8561404, (0 missing)
## 
## Node number 775: 89 observations,    complexity param=0.006262042
##   predicted class=Unpopular  expected loss=0.6179775  P(node) =0.1074879
##     class counts:    25    30    34
##    probabilities: 0.281 0.337 0.382 
##   left son=1550 (60 obs) right son=1551 (29 obs)
##   Primary splits:
##       R                         < 0.5      to the right, improve=1.4787030, (0 missing)
##       PG                        < 0.5      to the left,  improve=1.4787030, (0 missing)
##       Log_production_budget_adj < 18.0047  to the left,  improve=1.3404570, (0 missing)
##       Greater_than_135          < 0.5      to the right, improve=0.9234633, (0 missing)
##       Main_Adventure            < 0.5      to the right, improve=0.8918366, (0 missing)
##   Surrogate splits:
##       PG                        < 0.5      to the left,  agree=1.000, adj=1.000, (0 split)
##       Main_Animation            < 0.5      to the left,  agree=0.753, adj=0.241, (0 split)
##       Main_Adventure            < 0.5      to the left,  agree=0.742, adj=0.207, (0 split)
##       Log_production_budget_adj < 17.86695 to the left,  agree=0.708, adj=0.103, (0 split)
##       between_90_to_135         < 0.5      to the right, agree=0.708, adj=0.103, (0 split)
## 
## Node number 1548: 21 observations
##   predicted class=Moderate   expected loss=0.4285714  P(node) =0.02536232
##     class counts:    12     4     5
##    probabilities: 0.571 0.190 0.238 
## 
## Node number 1549: 3 observations
##   predicted class=Unpopular  expected loss=0  P(node) =0.003623188
##     class counts:     0     0     3
##    probabilities: 0.000 0.000 1.000 
## 
## Node number 1550: 60 observations
##   predicted class=Unpopular  expected loss=0.6  P(node) =0.07246377
##     class counts:    20    16    24
##    probabilities: 0.333 0.267 0.400 
## 
## Node number 1551: 29 observations
##   predicted class=Popular    expected loss=0.5172414  P(node) =0.03502415
##     class counts:     5    14    10
##    probabilities: 0.172 0.483 0.345

Root Node: The tree starts with a split on Log_production_budget_adj. This means the production budget is the most influential feature in determining the critic score categories.

Splits: The tree branches out based on feature thresholds, such as PG.13, genre_count, Summer, and other attributes. These splits aim to separate the data into more homogeneous groups concerning the target categories (e.g., “Moderate,” “Popular,” “Unpopular”).

Leaf Nodes: At the ends of the branches, leaf nodes show the predicted category and the proportion of data points in each category (e.g., 50% Popular, 30% Unpopular). These nodes represent the final decision for a given set of feature values.

Feature Importance: Features such as Log_production_budget_adj, PG.13, and genre_count appear frequently near the top, indicating their importance in predicting critic scores.

General Structure: The tree uses thresholds to segment the data into subgroups, attempting to classify observations as accurately as possible while maintaining interpretability.

Evaluation of decision tree

# Predict class probabilities on test data
predicted_probabilities <- predict(dt_model_categorical, newdata = test_data, type = "prob")

# Predict classes on test data
predicted_classes <- predict(dt_model_categorical, newdata = test_data, type = "class")

# Ensure true labels are factors
true_classes <- factor(test_data$Critic_score_category, levels = levels(train_data$Critic_score_category))

# Convert predicted classes to factors
predicted_classes <- factor(predicted_classes, levels = levels(train_data$Critic_score_category))

library(caret)

# Compute confusion matrix
confusion_matrix <- confusionMatrix(predicted_classes, true_classes)

# Print the confusion matrix
print(confusion_matrix)
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  Moderate Popular Unpopular
##   Moderate        19      17        17
##   Popular         40      66        30
##   Unpopular       52      37        78
## 
## Overall Statistics
##                                           
##                Accuracy : 0.4579          
##                  95% CI : (0.4052, 0.5112)
##     No Information Rate : 0.3511          
##     P-Value [Acc > NIR] : 2.130e-05       
##                                           
##                   Kappa : 0.1787          
##                                           
##  Mcnemar's Test P-Value : 4.068e-06       
## 
## Statistics by Class:
## 
##                      Class: Moderate Class: Popular Class: Unpopular
## Sensitivity                  0.17117         0.5500           0.6240
## Specificity                  0.86122         0.7034           0.6147
## Pos Pred Value               0.35849         0.4853           0.4671
## Neg Pred Value               0.69637         0.7545           0.7513
## Prevalence                   0.31180         0.3371           0.3511
## Detection Rate               0.05337         0.1854           0.2191
## Detection Prevalence         0.14888         0.3820           0.4691
## Balanced Accuracy            0.51620         0.6267           0.6194
library(pROC)

# Initialize lists for ROC and AUC
roc_list <- list()
auc_list <- list()

# Compute ROC and AUC for each class
for (category in levels(true_classes)) {
  # Create binary labels: 1 for the current class, 0 for all others
  true_binary <- ifelse(true_classes == category, 1, 0)
  
  # Get predicted probabilities for the current class
  predicted_probs_binary <- predicted_probabilities[, category]
  
  # Skip if true_binary has fewer than two levels
  if (length(unique(true_binary)) < 2) {
    cat("Skipping ROC for", category, "due to insufficient data.\n")
    next
  }
  
  # Compute ROC
  roc_obj <- roc(true_binary, predicted_probs_binary)
  roc_list[[category]] <- roc_obj
  auc_list[[category]] <- auc(roc_obj)
  
  # Plot ROC curve
  plot(roc_obj, main = paste("ROC Curve for", category), col = "blue")
  abline(a = 0, b = 1, col = "gray", lty = 2) # Reference line
  cat("AUC for", category, ":", auc_list[[category]], "\n")
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## AUC for Moderate : 0.50467
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## AUC for Popular : 0.6490643
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## AUC for Unpopular : 0.6117056
library(dplyr)

# Initialize a list to store Lift Tables for each class
lift_tables <- list()

# Loop through each class
for (category in levels(true_classes)) {
  # Combine test data with predicted probabilities
  test_data_category <- test_data %>%
    mutate(predicted_prob = predicted_probabilities[, category])
  
  # Sort by predicted probabilities for the current class
  test_data_category <- test_data_category %>%
    arrange(desc(predicted_prob))
  
  # Add deciles
  test_data_category$decile <- ntile(test_data_category$predicted_prob, 10)
  
  # Calculate Lift Table
  lift_table <- test_data_category %>%
    group_by(decile) %>%
    summarize(
      total = n(),
      events = sum(Critic_score_category == category),
      cumulative_events = cumsum(events),
      cumulative_percentage = cumulative_events / sum(Critic_score_category == category)
    )
  
  # Store the Lift Table
  lift_tables[[category]] <- lift_table
  
  # Plot Lift Chart for the current category
  plot(
    lift_table$decile, lift_table$cumulative_percentage,
    type = "o", col = "blue", xlab = "Decile", ylab = "Cumulative Gain",
    main = paste("Lift Chart for", category)
  )
  abline(a = 0, b = 0.1, col = "gray", lty = 2) # Reference line
}

The evaluation metrics for the Decision Tree model reveal its performance for classifying the “Critic_Score_Category” target variable. Below is the summary:

Overall Accuracy:

The accuracy of the model is 45.79%, which indicates that approximately 46% of predictions match the actual classes.

Class-wise Sensitivity:

Moderate: Sensitivity is 17.12%, indicating a low ability to identify this class. Popular: Sensitivity is 55.00%, showing moderate success in identifying this class. Unpopular: Sensitivity is 62.40%, performing relatively well for this class.

Class-wise Specificity:

Moderate: Specificity is 86.12%, meaning the model is good at excluding instances that are not “Moderate.” Popular: Specificity is 70.34%, showing it is decent at excluding non-“Popular” cases. Unpopular: Specificity is 61.47%, performing adequately for this class.

AUC (Area Under the ROC Curve):

Moderate: AUC is 0.5047, indicating random performance for this class. Popular: AUC is 0.6491, indicating moderate discrimination ability. Unpopular: AUC is 0.6117, suggesting fair discrimination for this class.


For optimizing film investments based on Critic_Score_Category, the best model is XGBoost.

Why? High Accuracy: XGBoost offers the most robust predictive performance compared to other models, ensuring better categorization of films into Popular, Moderate, or Unpopular categories. Class Handling: It excels in managing imbalanced datasets, critical for avoiding missed predictions in key categories like Popular (which are vital for investment decisions). Feature Interactions: XGBoost’s ability to capture complex interactions between features like budget, genre, and seasonal release ensures nuanced predictions that align with film investment goals. Scalability: Its efficient computation allows handling large datasets, making it scalable for future predictions as more data becomes available. XGBoost is the most suitable model for your goal of maximizing returns on film investments.


  1. IMDB_Category (Best Model: Random Forest)

Verified Significant Variables:

From the Random Forest model evaluation:

Log_production_budget_adj: A dominant predictor; indicates that budget heavily influences the IMDB category. PG.13 and R: Age ratings significantly impact categorization, reflecting audience segmentation. Genre_count: Diversity in genres is an essential factor. Main_Drama: Drama as the primary genre has a consistent impact. Between_90_to_135: Movie duration in this range strongly affects categorization. Seasonal Variables (Spring, Summer, Fall): Timing of releases is important for success. These variables align with the highest MeanDecreaseAccuracy and MeanDecreaseGini metrics observed in the Random Forest model for IMDB_Category.

  1. Critic_Score_Category (Best Model: XGBoost)

Verified Significant Variables:

From the XGBoost model evaluation:

Log_production_budget_adj: The most crucial variable across all evaluation metrics, showing its significant effect on critic scores. Genre_count: Indicates how the diversity of genres impacts critics’ evaluations. Main_Drama: The drama genre is consistently favored by critics. Between_90_to_135: Movies in this duration range are favored. PG.13, R, and G: Age ratings are significant predictors, reflecting audience targeting and content alignment with critics’ preferences. Seasonal Variables (Spring, Fall, Summer): Timing strongly influences critic perceptions. These variables are supported by their high importance scores (gain, cover, and frequency) in the XGBoost model.

  1. Log_Worldwide_Gross (Best Model: XGBoost)

Verified Significant Variables:

From the XGBoost model evaluation:

Log_production_budget_adj: Consistently the most important variable, as it directly affects revenue. Main_Action, Main_Adventure, Main_Comedy: These genres dominate revenue performance. Genre_count: A diverse set of genres correlates with higher revenue. Between_90_to_135 and Greater_than_135: Longer movie durations are critical for maximizing revenue. Seasonal Variables (Spring, Summer): Timing affects box office success. PG.13 and R: Age ratings are key to targeting the right audience for maximum revenue. The feature importance scores in XGBoost strongly support these variables.


We are taking Worldwide gross = XGBoost IMDbrating = Random forest Critic score = XGBoost

library(caret)

# Define a 10-fold cross-validation
train_control <- trainControl(method = "cv", number = 10)

# XGBoost Model for Critic score 
xgb_model_Critic_score_cv <- train(
  Critic_score_category ~ Log_production_budget_adj + PG.13 + R + PG + G + 
    between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count + 
    Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime + 
    Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror + 
    Main_Mystery + Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,
  data = train_data,
  method = "xgbTree",  # XGBoost model
  trControl = train_control
)
## [19:25:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:25:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:26:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:27:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:28:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
# Random Forest Model for IMDb Rating
rf_model_IMDB_Category_cv <- train(
  IMDB_Category ~ Log_production_budget_adj + PG.13 + R + PG + G + 
    between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count + 
    Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime + 
    Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror + 
    Main_Mystery + Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,
  data = train_data,
  method = "rf",  # Random Forest method
  trControl = train_control,
  tuneGrid = expand.grid(
    mtry = c(2, 4, 6, 8)  # Number of features randomly selected at each split
  )
)



# XGBoost Model for Worldwide Gross Adj Category 
xgb_model_Gross_Category_cv <- train(
  Log_Worldwide_Gross_Category ~ Log_production_budget_adj + PG.13 + R + PG + G + 
    between_90_to_135 + Greater_than_135 + Spring + Summer + Fall + genre_count + 
    Main_Action + Main_Adventure + Main_Animation + Main_Comedy + Main_Crime + 
    Main_Documentary + Main_Drama + Main_Family + Main_Fantasy + Main_Horror + 
    Main_Mystery + Main_History + Main_Romance + Main_Science_Fiction + Main_Thriller,
  data = train_data,
  method = "xgbTree",  # XGBoost model
  trControl = train_control
)
## [19:29:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:29:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:30:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:31:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:37] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:38] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:39] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:40] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:41] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:42] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:43] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:44] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:45] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:46] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:47] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:48] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:49] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:50] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:51] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:52] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:53] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:54] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:55] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:56] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:57] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:58] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:32:59] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:00] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:01] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:02] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:03] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:04] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:05] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:06] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:07] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:08] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:09] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:10] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:11] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:12] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:13] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:14] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:15] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:16] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:17] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:18] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:19] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:20] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:21] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:22] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:23] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:24] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:25] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:26] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:27] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:28] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:29] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:30] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:31] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:32] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:33] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:34] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:35] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
## [19:33:36] WARNING: src/c_api/c_api.cc:935: `ntree_limit` is deprecated, use `iteration_range` instead.
# Check the results of cross-validation
print(xgb_model_Critic_score_cv)
## eXtreme Gradient Boosting 
## 
## 828 samples
##  26 predictor
##   3 classes: 'Moderate', 'Popular', 'Unpopular' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 745, 745, 746, 744, 744, 745, ... 
## Resampling results across tuning parameters:
## 
##   eta  max_depth  colsample_bytree  subsample  nrounds  Accuracy   Kappa    
##   0.3  1          0.6               0.50        50      0.4382869  0.1400253
##   0.3  1          0.6               0.50       100      0.4275013  0.1280986
##   0.3  1          0.6               0.50       150      0.4178473  0.1131868
##   0.3  1          0.6               0.75        50      0.4335978  0.1335159
##   0.3  1          0.6               0.75       100      0.4407984  0.1453639
##   0.3  1          0.6               0.75       150      0.4287215  0.1287708
##   0.3  1          0.6               1.00        50      0.4347890  0.1346220
##   0.3  1          0.6               1.00       100      0.4299697  0.1289202
##   0.3  1          0.6               1.00       150      0.4324091  0.1334236
##   0.3  1          0.8               0.50        50      0.4372280  0.1389427
##   0.3  1          0.8               0.50       100      0.4346718  0.1376657
##   0.3  1          0.8               0.50       150      0.4312186  0.1329044
##   0.3  1          0.8               0.75        50      0.4431790  0.1484338
##   0.3  1          0.8               0.75       100      0.4348621  0.1380564
##   0.3  1          0.8               0.75       150      0.4408285  0.1476388
##   0.3  1          0.8               1.00        50      0.4360085  0.1355260
##   0.3  1          0.8               1.00       100      0.4275604  0.1247945
##   0.3  1          0.8               1.00       150      0.4384919  0.1423959
##   0.3  2          0.6               0.50        50      0.4454882  0.1553027
##   0.3  2          0.6               0.50       100      0.4298403  0.1351383
##   0.3  2          0.6               0.50       150      0.4407861  0.1518995
##   0.3  2          0.6               0.75        50      0.4334106  0.1364740
##   0.3  2          0.6               0.75       100      0.4334537  0.1381613
##   0.3  2          0.6               0.75       150      0.4335117  0.1400984
##   0.3  2          0.6               1.00        50      0.4371993  0.1420571
##   0.3  2          0.6               1.00       100      0.4335261  0.1403270
##   0.3  2          0.6               1.00       150      0.4443716  0.1579033
##   0.3  2          0.8               0.50        50      0.4455033  0.1547767
##   0.3  2          0.8               0.50       100      0.4588021  0.1766237
##   0.3  2          0.8               0.50       150      0.4322646  0.1368073
##   0.3  2          0.8               0.75        50      0.4335124  0.1376986
##   0.3  2          0.8               0.75       100      0.4238725  0.1248068
##   0.3  2          0.8               0.75       150      0.4408439  0.1520686
##   0.3  2          0.8               1.00        50      0.4479997  0.1578043
##   0.3  2          0.8               1.00       100      0.4346725  0.1414842
##   0.3  2          0.8               1.00       150      0.4504674  0.1658787
##   0.3  3          0.6               0.50        50      0.4433123  0.1541815
##   0.3  3          0.6               0.50       100      0.4444590  0.1568026
##   0.3  3          0.6               0.50       150      0.4359662  0.1461649
##   0.3  3          0.6               0.75        50      0.4588591  0.1768158
##   0.3  3          0.6               0.75       100      0.4456198  0.1600976
##   0.3  3          0.6               0.75       150      0.4396111  0.1511199
##   0.3  3          0.6               1.00        50      0.4359508  0.1427112
##   0.3  3          0.6               1.00       100      0.4385059  0.1480578
##   0.3  3          0.6               1.00       150      0.4372284  0.1477963
##   0.3  3          0.8               0.50        50      0.4201709  0.1209373
##   0.3  3          0.8               0.50       100      0.4311332  0.1382388
##   0.3  3          0.8               0.50       150      0.4360246  0.1468356
##   0.3  3          0.8               0.75        50      0.4383618  0.1465379
##   0.3  3          0.8               0.75       100      0.4421082  0.1543454
##   0.3  3          0.8               0.75       150      0.4457667  0.1613499
##   0.3  3          0.8               1.00        50      0.4468239  0.1586677
##   0.3  3          0.8               1.00       100      0.4456044  0.1597067
##   0.3  3          0.8               1.00       150      0.4384181  0.1505586
##   0.4  1          0.6               0.50        50      0.4383755  0.1408434
##   0.4  1          0.6               0.50       100      0.4360963  0.1408669
##   0.4  1          0.6               0.50       150      0.4311161  0.1342784
##   0.4  1          0.6               0.75        50      0.4468529  0.1557941
##   0.4  1          0.6               0.75       100      0.4334239  0.1354584
##   0.4  1          0.6               0.75       150      0.4286914  0.1292268
##   0.4  1          0.6               1.00        50      0.4323650  0.1316415
##   0.4  1          0.6               1.00       100      0.4372287  0.1408279
##   0.4  1          0.6               1.00       150      0.4348334  0.1382558
##   0.4  1          0.8               0.50        50      0.4648398  0.1825249
##   0.4  1          0.8               0.50       100      0.4468680  0.1553975
##   0.4  1          0.8               0.50       150      0.4442838  0.1538679
##   0.4  1          0.8               0.75        50      0.4396376  0.1434181
##   0.4  1          0.8               0.75       100      0.4250483  0.1243819
##   0.4  1          0.8               0.75       150      0.4298969  0.1304517
##   0.4  1          0.8               1.00        50      0.4384041  0.1404161
##   0.4  1          0.8               1.00       100      0.4409310  0.1462060
##   0.4  1          0.8               1.00       150      0.4312476  0.1325871
##   0.4  2          0.6               0.50        50      0.4165859  0.1130498
##   0.4  2          0.6               0.50       100      0.4321922  0.1399234
##   0.4  2          0.6               0.50       150      0.4359368  0.1449768
##   0.4  2          0.6               0.75        50      0.4430929  0.1529243
##   0.4  2          0.6               0.75       100      0.4382880  0.1485575
##   0.4  2          0.6               0.75       150      0.4358486  0.1448665
##   0.4  2          0.6               1.00        50      0.4514840  0.1647786
##   0.4  2          0.6               1.00       100      0.4382152  0.1469995
##   0.4  2          0.6               1.00       150      0.4394788  0.1502305
##   0.4  2          0.8               0.50        50      0.4432108  0.1544161
##   0.4  2          0.8               0.50       100      0.4322786  0.1402814
##   0.4  2          0.8               0.50       150      0.4444447  0.1590664
##   0.4  2          0.8               0.75        50      0.4358056  0.1413575
##   0.4  2          0.8               0.75       100      0.4478842  0.1621025
##   0.4  2          0.8               0.75       150      0.4479573  0.1637495
##   0.4  2          0.8               1.00        50      0.4382583  0.1456164
##   0.4  2          0.8               1.00       100      0.4528053  0.1706389
##   0.4  2          0.8               1.00       150      0.4516439  0.1684467
##   0.4  3          0.6               0.50        50      0.4285770  0.1335066
##   0.4  3          0.6               0.50       100      0.4467816  0.1639615
##   0.4  3          0.6               0.50       150      0.4372291  0.1497626
##   0.4  3          0.6               0.75        50      0.4299715  0.1353763
##   0.4  3          0.6               0.75       100      0.4274884  0.1320303
##   0.4  3          0.6               0.75       150      0.4202584  0.1240469
##   0.4  3          0.6               1.00        50      0.4395232  0.1485848
##   0.4  3          0.6               1.00       100      0.4444587  0.1577159
##   0.4  3          0.6               1.00       150      0.4373172  0.1483019
##   0.4  3          0.8               0.50        50      0.4420921  0.1536377
##   0.4  3          0.8               0.50       100      0.4409460  0.1550329
##   0.4  3          0.8               0.50       150      0.4336580  0.1445654
##   0.4  3          0.8               0.75        50      0.4348331  0.1439464
##   0.4  3          0.8               0.75       100      0.4482764  0.1651645
##   0.4  3          0.8               0.75       150      0.4312190  0.1407432
##   0.4  3          0.8               1.00        50      0.4299847  0.1347275
##   0.4  3          0.8               1.00       100      0.4576694  0.1788384
##   0.4  3          0.8               1.00       150      0.4468683  0.1632630
## 
## Tuning parameter 'gamma' was held constant at a value of 0
## Tuning
##  parameter 'min_child_weight' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were nrounds = 50, max_depth = 1, eta
##  = 0.4, gamma = 0, colsample_bytree = 0.8, min_child_weight = 1 and subsample
##  = 0.5.
print(xgb_model_Gross_Category_cv)
## eXtreme Gradient Boosting 
## 
## 828 samples
##  26 predictor
##   3 classes: 'High's', 'Low's', 'Medium' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 746, 745, 744, 745, 745, 746, ... 
## Resampling results across tuning parameters:
## 
##   eta  max_depth  colsample_bytree  subsample  nrounds  Accuracy   Kappa    
##   0.3  1          0.6               0.50        50      0.6232459  0.4345190
##   0.3  1          0.6               0.50       100      0.6171480  0.4253202
##   0.3  1          0.6               0.50       150      0.6219824  0.4326543
##   0.3  1          0.6               0.75        50      0.6316800  0.4471956
##   0.3  1          0.6               0.75       100      0.6207485  0.4307392
##   0.3  1          0.6               0.75       150      0.6195297  0.4290073
##   0.3  1          0.6               1.00        50      0.6280362  0.4416780
##   0.3  1          0.6               1.00       100      0.6184554  0.4273351
##   0.3  1          0.6               1.00       150      0.6196315  0.4291602
##   0.3  1          0.8               0.50        50      0.6267733  0.4398282
##   0.3  1          0.8               0.50       100      0.6255828  0.4380131
##   0.3  1          0.8               0.50       150      0.6194562  0.4288990
##   0.3  1          0.8               0.75        50      0.6327834  0.4488104
##   0.3  1          0.8               0.75       100      0.6182664  0.4270410
##   0.3  1          0.8               0.75       150      0.6170753  0.4252832
##   0.3  1          0.8               1.00        50      0.6316510  0.4471600
##   0.3  1          0.8               1.00       100      0.6196458  0.4291615
##   0.3  1          0.8               1.00       150      0.6196168  0.4290899
##   0.3  2          0.6               0.50        50      0.6099342  0.4145862
##   0.3  2          0.6               0.50       100      0.5929488  0.3891966
##   0.3  2          0.6               0.50       150      0.5689245  0.3531501
##   0.3  2          0.6               0.75        50      0.6292417  0.4434909
##   0.3  2          0.6               0.75       100      0.5906696  0.3857802
##   0.3  2          0.6               0.75       150      0.5870415  0.3803804
##   0.3  2          0.6               1.00        50      0.6039678  0.4057811
##   0.3  2          0.6               1.00       100      0.5906553  0.3857620
##   0.3  2          0.6               1.00       150      0.5942267  0.3910807
##   0.3  2          0.8               0.50        50      0.6039244  0.4057016
##   0.3  2          0.8               0.50       100      0.5761530  0.3639944
##   0.3  2          0.8               0.50       150      0.5724665  0.3585985
##   0.3  2          0.8               0.75        50      0.6039531  0.4055833
##   0.3  2          0.8               0.75       100      0.6002372  0.4000583
##   0.3  2          0.8               0.75       150      0.5846749  0.3767240
##   0.3  2          0.8               1.00        50      0.6136357  0.4202235
##   0.3  2          0.8               1.00       100      0.5991044  0.3983476
##   0.3  2          0.8               1.00       150      0.5846602  0.3767562
##   0.3  3          0.6               0.50        50      0.5942855  0.3913507
##   0.3  3          0.6               0.50       100      0.5712911  0.3567411
##   0.3  3          0.6               0.50       150      0.5737742  0.3604833
##   0.3  3          0.6               0.75        50      0.5954746  0.3930137
##   0.3  3          0.6               0.75       100      0.5617246  0.3423867
##   0.3  3          0.6               0.75       150      0.5628427  0.3440744
##   0.3  3          0.6               1.00        50      0.6087731  0.4128443
##   0.3  3          0.6               1.00       100      0.5894512  0.3838086
##   0.3  3          0.6               1.00       150      0.5737591  0.3603860
##   0.3  3          0.8               0.50        50      0.5893928  0.3838024
##   0.3  3          0.8               0.50       100      0.5616805  0.3422001
##   0.3  3          0.8               0.50       150      0.5761684  0.3640110
##   0.3  3          0.8               0.75        50      0.5929352  0.3891028
##   0.3  3          0.8               0.75       100      0.5737004  0.3603453
##   0.3  3          0.8               0.75       150      0.5520273  0.3278311
##   0.3  3          0.8               1.00        50      0.6063487  0.4092710
##   0.3  3          0.8               1.00       100      0.5942991  0.3912471
##   0.3  3          0.8               1.00       150      0.5725683  0.3585048
##   0.4  1          0.6               0.50        50      0.6171631  0.4252934
##   0.4  1          0.6               0.50       100      0.6123725  0.4182555
##   0.4  1          0.6               0.50       150      0.6219827  0.4327167
##   0.4  1          0.6               0.75        50      0.6292267  0.4434681
##   0.4  1          0.6               0.75       100      0.6135185  0.4199619
##   0.4  1          0.6               0.75       150      0.6135045  0.4199378
##   0.4  1          0.6               1.00        50      0.6231879  0.4344420
##   0.4  1          0.6               1.00       100      0.6183973  0.4272799
##   0.4  1          0.6               1.00       150      0.6171925  0.4254782
##   0.4  1          0.8               0.50        50      0.6365140  0.4543527
##   0.4  1          0.8               0.50       100      0.6219383  0.4326291
##   0.4  1          0.8               0.50       150      0.6135192  0.4199813
##   0.4  1          0.8               0.75        50      0.6268891  0.4399829
##   0.4  1          0.8               0.75       100      0.6196308  0.4291595
##   0.4  1          0.8               0.75       150      0.6208062  0.4309143
##   0.4  1          0.8               1.00        50      0.6219974  0.4326345
##   0.4  1          0.8               1.00       100      0.6195874  0.4290171
##   0.4  1          0.8               1.00       150      0.6159876  0.4236622
##   0.4  2          0.6               0.50        50      0.6039244  0.4055916
##   0.4  2          0.6               0.50       100      0.5772722  0.3657056
##   0.4  2          0.6               0.50       150      0.5785641  0.3676178
##   0.4  2          0.6               0.75        50      0.6002221  0.4001511
##   0.4  2          0.6               0.75       100      0.5893777  0.3839679
##   0.4  2          0.6               0.75       150      0.5676756  0.3514776
##   0.4  2          0.6               1.00        50      0.6063487  0.4093232
##   0.4  2          0.6               1.00       100      0.5809149  0.3711871
##   0.4  2          0.6               1.00       150      0.5773145  0.3657784
##   0.4  2          0.8               0.50        50      0.5978118  0.3965031
##   0.4  2          0.8               0.50       100      0.5627395  0.3438580
##   0.4  2          0.8               0.50       150      0.5701436  0.3549773
##   0.4  2          0.8               0.75        50      0.6063033  0.4092393
##   0.4  2          0.8               0.75       100      0.5725669  0.3586612
##   0.4  2          0.8               0.75       150      0.5749493  0.3622271
##   0.4  2          0.8               1.00        50      0.6111971  0.4165542
##   0.4  2          0.8               1.00       100      0.5918167  0.3873950
##   0.4  2          0.8               1.00       150      0.5725687  0.3585066
##   0.4  3          0.6               0.50        50      0.5726114  0.3586418
##   0.4  3          0.6               0.50       100      0.5738032  0.3605582
##   0.4  3          0.6               0.50       150      0.5508959  0.3261357
##   0.4  3          0.6               0.75        50      0.5737301  0.3603060
##   0.4  3          0.6               0.75       100      0.5640625  0.3458413
##   0.4  3          0.6               0.75       150      0.5591992  0.3386795
##   0.4  3          0.6               1.00        50      0.5905539  0.3855412
##   0.4  3          0.6               1.00       100      0.5749493  0.3622197
##   0.4  3          0.6               1.00       150      0.5580374  0.3367264
##   0.4  3          0.8               0.50        50      0.5675731  0.3511786
##   0.4  3          0.8               0.50       100      0.5278560  0.2915479
##   0.4  3          0.8               0.50       150      0.5434896  0.3149551
##   0.4  3          0.8               0.75        50      0.5724228  0.3585771
##   0.4  3          0.8               0.75       100      0.5580220  0.3368919
##   0.4  3          0.8               0.75       150      0.5580227  0.3369028
##   0.4  3          0.8               1.00        50      0.5942848  0.3911500
##   0.4  3          0.8               1.00       100      0.5809153  0.3710749
##   0.4  3          0.8               1.00       150      0.5652663  0.3478006
## 
## Tuning parameter 'gamma' was held constant at a value of 0
## Tuning
##  parameter 'min_child_weight' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were nrounds = 50, max_depth = 1, eta
##  = 0.4, gamma = 0, colsample_bytree = 0.8, min_child_weight = 1 and subsample
##  = 0.5.
print(rf_model_IMDB_Category_cv)
## Random Forest 
## 
## 828 samples
##  26 predictor
##   3 classes: 'Excellent', 'Good', 'Poor' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 745, 745, 745, 745, 746, 746, ... 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##   2     0.4866735  0.1274336
##   4     0.4842345  0.1622359
##   6     0.4661328  0.1441397
##   8     0.4552160  0.1360659
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.
# Evaluate Critic Score Model
true_critic_scores <- test_data$Critic_score_category
predicted_critic_scores <- predict(xgb_model_Critic_score_cv, newdata = test_data)
confusion_matrix_critic <- confusionMatrix(
  factor(predicted_critic_scores, levels = levels(true_critic_scores)),
  factor(true_critic_scores)
)
print(confusion_matrix_critic)
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  Moderate Popular Unpopular
##   Moderate        29      13        18
##   Popular         39      82        41
##   Unpopular       43      25        66
## 
## Overall Statistics
##                                           
##                Accuracy : 0.4972          
##                  95% CI : (0.4441, 0.5504)
##     No Information Rate : 0.3511          
##     P-Value [Acc > NIR] : 1.118e-08       
##                                           
##                   Kappa : 0.2403          
##                                           
##  Mcnemar's Test P-Value : 5.543e-06       
## 
## Statistics by Class:
## 
##                      Class: Moderate Class: Popular Class: Unpopular
## Sensitivity                  0.26126         0.6833           0.5280
## Specificity                  0.87347         0.6610           0.7056
## Pos Pred Value               0.48333         0.5062           0.4925
## Neg Pred Value               0.72297         0.8041           0.7342
## Prevalence                   0.31180         0.3371           0.3511
## Detection Rate               0.08146         0.2303           0.1854
## Detection Prevalence         0.16854         0.4551           0.3764
## Balanced Accuracy            0.56737         0.6722           0.6168
# Evaluate Gross Category Model
true_gross_category <- test_data$Log_Worldwide_Gross_Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = test_data)
confusion_matrix_gross <- confusionMatrix(
  factor(predicted_gross_category, levels = levels(true_gross_category)),
  factor(true_gross_category)
)
print(confusion_matrix_gross)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction High's Low's Medium
##     High's     84     4     30
##     Low's      11    78     28
##     Medium     28    32     61
## 
## Overall Statistics
##                                           
##                Accuracy : 0.6264          
##                  95% CI : (0.5739, 0.6768)
##     No Information Rate : 0.3455          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.4396          
##                                           
##  Mcnemar's Test P-Value : 0.3077          
## 
## Statistics by Class:
## 
##                      Class: High's Class: Low's Class: Medium
## Sensitivity                 0.6829       0.6842        0.5126
## Specificity                 0.8541       0.8388        0.7468
## Pos Pred Value              0.7119       0.6667        0.5041
## Neg Pred Value              0.8361       0.8494        0.7532
## Prevalence                  0.3455       0.3202        0.3343
## Detection Rate              0.2360       0.2191        0.1713
## Detection Prevalence        0.3315       0.3287        0.3399
## Balanced Accuracy           0.7685       0.7615        0.6297
# Evaluate IMDB Category Model
true_imdb_category <- test_data$IMDB_Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = test_data)
confusion_matrix_imdb <- confusionMatrix(
  factor(predicted_imdb_category, levels = levels(true_imdb_category)),
  factor(true_imdb_category)
)
print(confusion_matrix_imdb)
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  Excellent Good Poor
##   Excellent        11   15    3
##   Good             61  131   97
##   Poor              2   17   19
## 
## Overall Statistics
##                                           
##                Accuracy : 0.4522          
##                  95% CI : (0.3997, 0.5056)
##     No Information Rate : 0.4579          
##     P-Value [Acc > NIR] : 0.6043          
##                                           
##                   Kappa : 0.0485          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
## 
## Statistics by Class:
## 
##                      Class: Excellent Class: Good Class: Poor
## Sensitivity                   0.14865      0.8037     0.15966
## Specificity                   0.93617      0.1813     0.91983
## Pos Pred Value                0.37931      0.4533     0.50000
## Neg Pred Value                0.80734      0.5224     0.68553
## Prevalence                    0.20787      0.4579     0.33427
## Detection Rate                0.03090      0.3680     0.05337
## Detection Prevalence          0.08146      0.8118     0.10674
## Balanced Accuracy             0.54241      0.4925     0.53975
## Cruella 2021, moderate film 

# Load required libraries
library(caret)
library(nnet)  # For multinomial logistic regression
library(xgboost)  # For XGBoost


# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(200000000),  # Example: Log of budget
  PG.13 = 1,
  R = 0,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 1,
  Summer = 0,
  Fall = 0,
  Winter = 0,
  genre_count = 2,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 1,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Moderate"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                Moderate                    High's                     Good
## Mank 2020, loss making movie, rating were good 

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(25000000),  # Example: Log of budget
  PG.13 = 0,
  R = 1,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 0,
  Fall = 0,
  Winter = 1,
  genre_count = 2,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 1,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  Medium"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                    Medium                     Good
## Don't worry Darling 2022, Moderate as per rating, high revenue 

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(35000000),  # Example: Log of budget
  PG.13 = 0,
  R = 1,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 0,
  Fall = 1,
  Winter = 0,
  genre_count = 2,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 1,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  Medium"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1               Unpopular                    Medium                     Good
## Minari 2021, Good in critic and hit revenue  

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(2000000),  # Example: Log of budget
  PG.13 = 1,
  R = 0,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 0,
  Fall = 0,
  Winter = 1,
  genre_count = 1,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 1,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  Low's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                     Low's                     Good
## Cats 2019


# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(95000000),  # Example: Log of budget
  PG.13 = 0,
  R = 0,
  PG = 1,
  G = 0,
  between_90_to_135 = 0,
  Greater_than_135 = 1,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 0,
  Fall = 0,
  Winter = 1,
  genre_count = 3,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 1
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Poor"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1               Unpopular                    High's                     Poor
#  Ford v Ferrari 2019


# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(97600000),  # Example: Log of budget
  PG.13 = 1,
  R = 0,
  PG = 0,
  G = 0,
  between_90_to_135 = 0,
  Greater_than_135 = 1,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 0,
  Fall = 1,
  Winter = 0,
  genre_count = 3,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 1,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Excellent"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1               Unpopular                    High's                Excellent
#  Alita: Battle Angel 2019


# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(170000000),  # Example: Log of budget
  PG.13 = 1,
  R = 0,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 0,
  Fall = 0,
  Winter = 1,
  genre_count = 5,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 1,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1               Unpopular                    High's                     Good
# Black Christmas 2019


# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(5000000),  # Example: Log of budget
  PG.13 = 1,
  R = 0,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 0,
  Fall = 0,
  Winter = 1,
  genre_count = 4,
  Main_Action = 0,
  Main_Adventure = 1,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  Low's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                     Low's                     Good
# Ready or Not 2019



# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(6000000),  # Example: Log of budget
  PG.13 = 0,
  R = 1,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 1,
  Fall = 0,
  Winter = 0,
  genre_count = 4,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 1,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  Medium"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                    Medium                     Good
# Magic Mike's Last Dance 2023


# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(45000000),  # Example: Log of budget
  PG.13 = 0,
  R = 1,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 0,
  Fall = 0,
  Winter = 1,
  genre_count = 3,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 1,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  Medium"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                    Medium                     Good
# Tyler Perry's A Madea Family Funeral 2019


# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(20000000),  # Example: Log of budget
  PG.13 = 1,
  R = 0,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 1,
  Summer = 0,
  Fall = 0,
  Winter = 0,
  genre_count = 2,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 1,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  Medium"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Poor"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1               Unpopular                    Medium                     Poor
# The little things 2021

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(30000000),  # Example: Log of budget
  PG.13 = 0,
  R = 1,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 0,
  Fall = 0,
  Winter = 1,
  genre_count = 3,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 1,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  Medium"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                    Medium                     Good
## Those Who Wish Me Dead 2021

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(20000000),  # Example: Log of budget
  PG.13 = 0,
  R = 1,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 1,
  Summer = 0,
  Fall = 0,
  Winter = 0,
  genre_count = 2,
  Main_Action = 1,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Moderate"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  Low's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                Moderate                     Low's                     Good
## The batman 2022 

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(185000000),  # Example: Log of budget
  PG.13 = 1,
  R = 0,
  PG = 0,
  G = 0,
  between_90_to_135 = 0,
  Greater_than_135 = 1,
  Less.than.90 = 0,
  Spring = 1,
  Summer = 0,
  Fall = 0,
  Winter = 0,
  genre_count = 4,
  Main_Action = 1,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1               Unpopular                    High's                     Good
## The outfit 2022

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(5000000),  # Example: Log of budget
  PG.13 = 0,
  R = 1,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 1,
  Summer = 0,
  Fall = 0,
  Winter = 0,
  genre_count = 4,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 1,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Moderate"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  Low's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                Moderate                     Low's                     Good
## Call of the wild 2020

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(109000000),  # Example: Log of budget
  PG.13 = 0,
  R = 0,
  PG = 1,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 0,
  Fall = 0,
  Winter = 1,
  genre_count = 3,
  Main_Action = 0,
  Main_Adventure = 1,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Moderate"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Poor"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                Moderate                    High's                     Poor
## Nomadland 2021

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(5000000),  # Example: Log of budget
  PG.13 = 0,
  R = 1,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 0,
  Fall = 0,
  Winter = 1,
  genre_count = 1,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 1,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  Low's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                     Low's                     Good
## Promising young women 2020

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(10000000),  # Example: Log of budget
  PG.13 = 0,
  R = 1,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 0,
  Fall = 0,
  Winter = 1,
  genre_count = 3,
  Main_Action =0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 1,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  Low's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                     Low's                     Good
## Parasite 2019

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(11000000),  # Example: Log of budget
  PG.13 = 0,
  R = 1,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 1,
  Summer = 0,
  Fall = 0,
  Winter = 0,
  genre_count = 2,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 1,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  Low's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                     Low's                     Good
## Joker 2019

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(55000000),  # Example: Log of budget
  PG.13 = 0,
  R = 1,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 0,
  Fall = 1,
  Winter = 0,
  genre_count = 3,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 1,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  Medium"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                    Medium                     Good
## 1917 2019

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(100000000),  # Example: Log of budget
  PG.13 = 0,
  R = 1,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 0,
  Fall = 0,
  Winter = 1,
  genre_count = 2,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 1,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Moderate"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                Moderate                    High's                     Good
## Oppenheimer 2023

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(100000000),  # Example: Log of budget
  PG.13 = 0,
  R = 1,
  PG = 0,
  G = 0,
  between_90_to_135 = 0,
  Greater_than_135 = 1,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 1,
  Fall = 0,
  Winter = 0,
  genre_count = 2,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 1,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Excellent"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                    High's                Excellent
## Spider-man No way home 2021

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(200000000),  # Example: Log of budget
  PG.13 = 1,
  R = 0,
  PG = 0,
  G = 0,
  between_90_to_135 = 0,
  Greater_than_135 = 1,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 0,
  Fall = 0,
  Winter = 1,
  genre_count = 3,
  Main_Action = 1,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1               Unpopular                    High's                     Good
## Top Gun: Maverick 2022

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(170000000),  # Example: Log of budget
  PG.13 = 1,
  R = 0,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 1,
  Summer = 0,
  Fall = 0,
  Winter = 0,
  genre_count = 2,
  Main_Action = 1,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1               Unpopular                    High's                     Good
## Avatar: The way of Water 2022

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(460000000),  # Example: Log of budget
  PG.13 = 1,
  R = 0,
  PG = 0,
  G = 0,
  between_90_to_135 = 0,
  Greater_than_135 = 1,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 0,
  Fall = 0,
  Winter = 1,
  genre_count = 3,
  Main_Action = 1,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1               Unpopular                    High's                     Good
## Barbie 2023

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(145000000),  # Example: Log of budget
  PG.13 = 1,
  R = 0,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 1,
  Fall = 0,
  Winter = 0,
  genre_count = 3,
  Main_Action = 0,
  Main_Adventure = 1,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                    High's                     Good
## Guardians of Galaxy Vol 3

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(250000000),  # Example: Log of budget
  PG.13 = 1,
  R = 0,
  PG = 0,
  G = 0,
  between_90_to_135 = 0,
  Greater_than_135 = 1,
  Less.than.90 = 0,
  Spring = 1,
  Summer = 0,
  Fall = 0,
  Winter = 0,
  genre_count = 3,
  Main_Action = 1,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1               Unpopular                    High's                     Good
## Once upon the time in Hollywood 2019

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(95000000),  # Example: Log of budget
  PG.13 = 0,
  R = 1,
  PG = 0,
  G = 0,
  between_90_to_135 = 0,
  Greater_than_135 = 1,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 1,
  Fall = 0,
  Winter = 0,
  genre_count = 2,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 1,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Moderate"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                Moderate                    High's                     Good
## The lighthouse 2019

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(11000000),  # Example: Log of budget
  PG.13 = 0,
  R = 1,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 0,
  Fall = 1,
  Winter = 0,
  genre_count = 2,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 1,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  Medium"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                    Medium                     Good
## Midsommar 2019

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(9000000),  # Example: Log of budget
  PG.13 = 0,
  R = 1,
  PG = 0,
  G = 0,
  between_90_to_135 = 0,
  Greater_than_135 = 1,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 1,
  Fall = 0,
  Winter = 0,
  genre_count = 0,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 1,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  Medium"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                    Medium                     Good
## Creed 3 2023

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(75000000),  # Example: Log of budget
  PG.13 = 1,
  R = 0,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 1,
  Summer = 0,
  Fall = 0,
  Winter = 0,
  genre_count = 2,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 1,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1               Unpopular                    High's                     Good
## Tenet 2020

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(205000000),  # Example: Log of budget
  PG.13 = 1,
  R = 0,
  PG = 0,
  G = 0,
  between_90_to_135 = 0,
  Greater_than_135 = 1,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 0,
  Fall = 1,
  Winter = 0,
  genre_count = 3,
  Main_Action = 1,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                    High's                     Good
## Dune 2021

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(165000000),  # Example: Log of budget
  PG.13 = 1,
  R = 0,
  PG = 0,
  G = 0,
  between_90_to_135 = 0,
  Greater_than_135 = 1,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 0,
  Fall = 1,
  Winter = 0,
  genre_count = 3,
  Main_Action = 1,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1               Unpopular                    High's                     Good
## No time to die 2021

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(250000000),  # Example: Log of budget
  PG.13 = 1,
  R = 0,
  PG = 0,
  G = 0,
  between_90_to_135 = 0,
  Greater_than_135 = 1,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 0,
  Fall = 0,
  Winter = 1,
  genre_count = 3,
  Main_Action = 1,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1               Unpopular                    High's                     Good
## Elvis 2022

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(85000000),  # Example: Log of budget
  PG.13 = 1,
  R = 0,
  PG = 0,
  G = 0,
  between_90_to_135 = 0,
  Greater_than_135 = 1,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 1,
  Fall = 0,
  Winter = 0,
  genre_count = 3,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 1,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Excellent"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                    High's                Excellent
## Mission impossible 2023

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(291000000),  # Example: Log of budget
  PG.13 = 1,
  R = 0,
  PG = 0,
  G = 0,
  between_90_to_135 = 0,
  Greater_than_135 = 1,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 1,
  Fall = 0,
  Winter = 0,
  genre_count = 3,
  Main_Action = 1,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                    High's                     Good
## The super Mario Bros 2023

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(100000000),  # Example: Log of budget
  PG.13 = 0,
  R = 0,
  PG = 1,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 1,
  Summer = 0,
  Fall = 0,
  Winter = 0,
  genre_count = 3,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 1,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Moderate"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                Moderate                    High's                     Good
## Killers of the moon 2023

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(200000000),  # Example: Log of budget
  PG.13 = 0,
  R = 1,
  PG = 0,
  G = 0,
  between_90_to_135 = 0,
  Greater_than_135 = 1,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 0,
  Fall = 1,
  Winter = 0,
  genre_count = 3,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 1,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                    High's                     Good
## Doctor Sleep 2019

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(45000000),  # Example: Log of budget
  PG.13 = 0,
  R = 1,
  PG = 0,
  G = 0,
  between_90_to_135 = 0,
  Greater_than_135 = 1,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 0,
  Fall = 1,
  Winter = 0,
  genre_count = 3,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 1,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Excellent"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                    High's                Excellent
## The french Dispatch 2021

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(25000000),  # Example: Log of budget
  PG.13 = 0,
  R = 1,
  PG = 0,
  G = 0,
  between_90_to_135 = 0,
  Greater_than_135 = 1,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 0,
  Fall = 1,
  Winter = 0,
  genre_count = 3,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 1,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  Medium"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                    Medium                     Good
## minions: The rise of Gru 2022

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(85000000),  # Example: Log of budget
  PG.13 = 0,
  R = 0,
  PG = 1,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 1,
  Fall = 0,
  Winter = 0,
  genre_count = 3,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 1,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                    High's                     Good
## Past lives 2023  

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(12000000),  # Example: Log of budget
  PG.13 = 1,
  R = 0,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 1,
  Fall = 0,
  Winter = 0,
  genre_count = 2,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 1,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  Low's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                     Low's                     Good
## The Holdovers 2023

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(13000000),  # Example: Log of budget
  PG.13 = 0,
  R = 1,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 0,
  Fall = 1,
  Winter = 0,
  genre_count = 2,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 1,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  Low's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                     Low's                     Good
## Jojo Rabbit 2019

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(14000000),  # Example: Log of budget
  PG.13 = 1,
  R = 0,
  PG = 0,
  G = 0,
  between_90_to_135 = 0,
  Greater_than_135 = 1,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 0,
  Fall = 1,
  Winter = 0,
  genre_count = 3,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 1,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  Medium"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                    Medium                     Good
## Possessor 2020

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(2500000),  # Example: Log of budget
  PG.13 = 0,
  R = 1,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 0,
  Fall = 1,
  Winter = 0,
  genre_count = 3,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 1,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  Low's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                     Low's                     Good
## Titane 2021

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(6600000),  # Example: Log of budget
  PG.13 = 0,
  R = 1,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 1,
  Fall = 0,
  Winter = 0,
  genre_count = 3,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 1,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  Low's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                     Low's                     Good
## The green knight 2021

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(15000000),  # Example: Log of budget
  PG.13 = 0,
  R = 1,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 1,
  Fall = 0,
  Winter = 0,
  genre_count = 3,
  Main_Action = 0,
  Main_Adventure = 1,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  Low's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                     Low's                     Good
## Nope 2022

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(68000000),  # Example: Log of budget
  PG.13 = 0,
  R = 1,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 1,
  Fall = 0,
  Winter = 0,
  genre_count = 3,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 1,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1               Unpopular                    High's                     Good
## Infinity Pool 2023

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(4500000),  # Example: Log of budget
  PG.13 = 0,
  R = 1,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 0,
  Fall = 0,
  Winter = 1,
  genre_count = 3,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 1,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  Low's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                     Low's                     Good
## Beau is Afraid 2023

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(35000000),  # Example: Log of budget
  PG.13 = 0,
  R = 1,
  PG = 0,
  G = 0,
  between_90_to_135 = 0,
  Greater_than_135 = 1,
  Less.than.90 = 0,
  Spring = 1,
  Summer = 0,
  Fall = 0,
  Winter = 0,
  genre_count = 3,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 1,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  Medium"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                    Medium                     Good
## Dolittle 2020

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(175000000),  # Example: Log of budget
  PG.13 = 0,
  R = 0,
  PG = 1,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 0,
  Fall = 0,
  Winter = 1,
  genre_count = 3,
  Main_Action = 0,
  Main_Adventure = 1,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Poor"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1               Unpopular                    High's                     Poor
## Morbuis 2022

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(75000000),  # Example: Log of budget
  PG.13 = 1,
  R = 0,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 1,
  Summer = 0,
  Fall = 0,
  Winter = 0,
  genre_count = 3,
  Main_Action = 1,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1               Unpopular                    High's                     Good
## Firestarter 2022

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(12000000),  # Example: Log of budget
  PG.13 = 0,
  R = 1,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 1,
  Summer = 0,
  Fall = 0,
  Winter = 0,
  genre_count = 3,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 1,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  Medium"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1               Unpopular                    Medium                     Good
## The 355 2022

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(75000000),  # Example: Log of budget
  PG.13 = 1,
  R = 0,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 0,
  Fall = 0,
  Winter = 1,
  genre_count = 2,
  Main_Action = 1,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1               Unpopular                    High's                     Good
## Winnie the pooh: Blood and Honey 

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(100000),  # Example: Log of budget
  PG.13 = 0,
  R = 1,
  PG = 0,
  G = 0,
  between_90_to_135 = 0,
  Greater_than_135 = 0,
  Less.than.90 = 1,
  Spring = 0,
  Summer = 0,
  Fall = 0,
  Winter = 1,
  genre_count = 2,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 1,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  Low's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Poor"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                     Low's                     Poor
## Us 2019

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(20000000),  # Example: Log of budget
  PG.13 = 0,
  R = 1,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 1,
  Summer = 0,
  Fall = 0,
  Winter = 0,
  genre_count = 3,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 1,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  Medium"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1               Unpopular                    Medium                     Good
##  Godzilla: King of the Monsters

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(170000000),  # Example: Log of budget
  PG.13 = 1,
  R = 0,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 1,
  Summer = 0,
  Fall = 0,
  Winter = 0,
  genre_count = 3,
  Main_Action = 1,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1               Unpopular                    High's                     Good
## Capain Marvel 2019

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(152000000),  # Example: Log of budget
  PG.13 = 1,
  R = 0,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 1,
  Summer = 0,
  Fall = 0,
  Winter = 0,
  genre_count = 3,
  Main_Action = 1,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1               Unpopular                    High's                     Good
## Ad Astra 2019

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(87500000),  # Example: Log of budget
  PG.13 = 1,
  R = 0,
  PG = 0,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 0,
  Fall = 1,
  Winter = 0,
  genre_count = 3,
  Main_Action = 0,
  Main_Adventure = 1,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1               Unpopular                    High's                     Good
## The Farewell 

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(250300),  # Example: Log of budget
  PG.13 = 0,
  R = 0,
  PG = 1,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 1,
  Fall = 0,
  Winter = 0,
  genre_count = 2,
  Main_Action = 0,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 1,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  Low's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                     Low's                     Good
## Avengers: Endgame 2019

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(356000000),  # Example: Log of budget
  PG.13 = 1,
  R = 0,
  PG = 0,
  G = 0,
  between_90_to_135 = 0,
  Greater_than_135 = 1,
  Less.than.90 = 0,
  Spring = 1,
  Summer = 0,
  Fall = 0,
  Winter = 0,
  genre_count = 3,
  Main_Action = 1,
  Main_Adventure = 0,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Unpopular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1               Unpopular                    High's                     Good
## The Lion King 2019 

# Input data (this can be obtained through a CSV or input prompts)
input_data <- data.frame(
  Log_production_budget_adj = log(260000000),  # Example: Log of budget
  PG.13 = 0,
  R = 0,
  PG = 1,
  G = 0,
  between_90_to_135 = 1,
  Greater_than_135 = 0,
  Less.than.90 = 0,
  Spring = 0,
  Summer = 1,
  Fall = 0,
  Winter = 0,
  genre_count = 3,
  Main_Action = 0,
  Main_Adventure = 1,
  Main_Animation = 0,
  Main_Comedy = 0,
  Main_Crime = 0,
  Main_Documentary = 0,
  Main_Drama = 0,
  Main_Family = 0,
  Main_Fantasy = 0,
  Main_Horror = 0,
  Main_Mystery = 0,
  Main_History = 0,
  Main_Romance = 0,
  Main_Science_Fiction = 0,
  Main_Thriller = 0,
  Other_Genres = 0
)


# Predict with the Polynomial Logistic Regression Model for Popularity
predicted_Critic_score <- predict(xgb_model_Critic_score_cv, newdata = input_data)
print(paste("Predicted Critic score Category: ", predicted_Critic_score))
## [1] "Predicted Critic score Category:  Popular"
# Predict with the Polynomial Logistic Regression Model for Gross Category
predicted_gross_category <- predict(xgb_model_Gross_Category_cv, newdata = input_data)
print(paste("Predicted Gross Category: ", predicted_gross_category))
## [1] "Predicted Gross Category:  High's"
# Predict with the XGBoost Model for IMDB Category
predicted_imdb_category <- predict(rf_model_IMDB_Category_cv, newdata = input_data)
print(paste("Predicted IMDB Category: ", predicted_imdb_category))
## [1] "Predicted IMDB Category:  Good"
# Output the results in a structured format (e.g., CSV or simple printout)
predictions <- data.frame(
  Critic_score_Prediction = predicted_Critic_score,
  Gross_Category_Prediction = predicted_gross_category,
  IMDB_Category_Prediction = predicted_imdb_category
)

# Print the results to console
print(predictions)
##   Critic_score_Prediction Gross_Category_Prediction IMDB_Category_Prediction
## 1                 Popular                    High's                     Good
# # Optionally save the results to a CSV
# write.csv(data, "fdata.csv", row.names = FALSE)
#